daniellefranca96 commited on
Commit
b160907
1 Parent(s): ca13c5e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -8
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import time
2
  import gradio as gr
3
- from transformers import AutoTokenizer
4
  import json
5
  import requests
6
 
@@ -32,11 +31,6 @@ def update(prompt, llm, nctx, max_tokens):
32
 
33
  answer = {}
34
 
35
- tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
36
-
37
- # Tokenize the input text
38
- tokenized_input = tokenizer.encode(prompt, return_tensors="pt")
39
-
40
  # Measure processing time
41
  start_time = time.time()
42
  result = make_request_to_llm(llm, prompt, max_tokens, int(nctx)*1000)
@@ -47,8 +41,6 @@ def update(prompt, llm, nctx, max_tokens):
47
  duration = end_time - start_time
48
  answer['Duration'] = duration
49
  print("Duration: "+str(duration))
50
- tokens_per_second = len(tokenized_input) / duration
51
- answer['Tokens Per Second'] = tokens_per_second
52
  answer['answer'] = result
53
  return json.dumps(answer)
54
 
 
1
  import time
2
  import gradio as gr
 
3
  import json
4
  import requests
5
 
 
31
 
32
  answer = {}
33
 
 
 
 
 
 
34
  # Measure processing time
35
  start_time = time.time()
36
  result = make_request_to_llm(llm, prompt, max_tokens, int(nctx)*1000)
 
41
  duration = end_time - start_time
42
  answer['Duration'] = duration
43
  print("Duration: "+str(duration))
 
 
44
  answer['answer'] = result
45
  return json.dumps(answer)
46