Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -39,7 +39,6 @@ else:
|
|
39 |
|
40 |
@spaces.GPU(duration=150)
|
41 |
def chat(message, history, temperature,do_sample, max_tokens):
|
42 |
-
start_time = time.time()
|
43 |
chat = []
|
44 |
for item in history:
|
45 |
chat.append({"role": "user", "content": item[0]})
|
@@ -67,19 +66,12 @@ def chat(message, history, temperature,do_sample, max_tokens):
|
|
67 |
t.start()
|
68 |
|
69 |
partial_text = ""
|
70 |
-
first_token_time = None
|
71 |
for new_text in streamer:
|
72 |
-
if not first_token_time:
|
73 |
-
first_token_time = time.time() - start_time
|
74 |
partial_text += new_text
|
75 |
yield partial_text
|
76 |
|
77 |
-
total_time = time.time() - start_time
|
78 |
tokens = len(tok.tokenize(partial_text))
|
79 |
-
|
80 |
-
|
81 |
-
timing_info = f"\n\nTime taken to first token: {first_token_time:.2f} seconds\nTokens per second: {tokens_per_second:.2f}"
|
82 |
-
yield partial_text + timing_info
|
83 |
|
84 |
|
85 |
demo = gr.ChatInterface(
|
|
|
39 |
|
40 |
@spaces.GPU(duration=150)
|
41 |
def chat(message, history, temperature,do_sample, max_tokens):
|
|
|
42 |
chat = []
|
43 |
for item in history:
|
44 |
chat.append({"role": "user", "content": item[0]})
|
|
|
66 |
t.start()
|
67 |
|
68 |
partial_text = ""
|
|
|
69 |
for new_text in streamer:
|
|
|
|
|
70 |
partial_text += new_text
|
71 |
yield partial_text
|
72 |
|
|
|
73 |
tokens = len(tok.tokenize(partial_text))
|
74 |
+
yield partial_text
|
|
|
|
|
|
|
75 |
|
76 |
|
77 |
demo = gr.ChatInterface(
|