Spaces:
Sleeping
Sleeping
Commit
•
b160907
1
Parent(s):
ca13c5e
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import time
|
2 |
import gradio as gr
|
3 |
-
from transformers import AutoTokenizer
|
4 |
import json
|
5 |
import requests
|
6 |
|
@@ -32,11 +31,6 @@ def update(prompt, llm, nctx, max_tokens):
|
|
32 |
|
33 |
answer = {}
|
34 |
|
35 |
-
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-chat-hf")
|
36 |
-
|
37 |
-
# Tokenize the input text
|
38 |
-
tokenized_input = tokenizer.encode(prompt, return_tensors="pt")
|
39 |
-
|
40 |
# Measure processing time
|
41 |
start_time = time.time()
|
42 |
result = make_request_to_llm(llm, prompt, max_tokens, int(nctx)*1000)
|
@@ -47,8 +41,6 @@ def update(prompt, llm, nctx, max_tokens):
|
|
47 |
duration = end_time - start_time
|
48 |
answer['Duration'] = duration
|
49 |
print("Duration: "+str(duration))
|
50 |
-
tokens_per_second = len(tokenized_input) / duration
|
51 |
-
answer['Tokens Per Second'] = tokens_per_second
|
52 |
answer['answer'] = result
|
53 |
return json.dumps(answer)
|
54 |
|
|
|
1 |
import time
|
2 |
import gradio as gr
|
|
|
3 |
import json
|
4 |
import requests
|
5 |
|
|
|
31 |
|
32 |
answer = {}
|
33 |
|
|
|
|
|
|
|
|
|
|
|
34 |
# Measure processing time
|
35 |
start_time = time.time()
|
36 |
result = make_request_to_llm(llm, prompt, max_tokens, int(nctx)*1000)
|
|
|
41 |
duration = end_time - start_time
|
42 |
answer['Duration'] = duration
|
43 |
print("Duration: "+str(duration))
|
|
|
|
|
44 |
answer['answer'] = result
|
45 |
return json.dumps(answer)
|
46 |
|