AIO_Chat

Running on Zero

eswardivi commited on May 3, 2024

Commit

0f1f78e

verified ·

1 Parent(s): 1290a74

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -39,7 +39,6 @@ else:
 @spaces.GPU(duration=150)
 def chat(message, history, temperature,do_sample, max_tokens):
-    start_time = time.time()
     chat = []
     for item in history:
         chat.append({"role": "user", "content": item[0]})
@@ -67,19 +66,12 @@ def chat(message, history, temperature,do_sample, max_tokens):
     t.start()
     partial_text = ""
-    first_token_time = None
     for new_text in streamer:
-        if not first_token_time:
-            first_token_time = time.time() - start_time
         partial_text += new_text
         yield partial_text
-    total_time = time.time() - start_time
     tokens = len(tok.tokenize(partial_text))
-    tokens_per_second = tokens / total_time if total_time > 0 else 0
-    timing_info = f"\n\nTime taken to first token: {first_token_time:.2f} seconds\nTokens per second: {tokens_per_second:.2f}"
-    yield partial_text +  timing_info
 demo = gr.ChatInterface(

 @spaces.GPU(duration=150)
 def chat(message, history, temperature,do_sample, max_tokens):
     chat = []
     for item in history:
         chat.append({"role": "user", "content": item[0]})
     t.start()
     partial_text = ""
     for new_text in streamer:
         partial_text += new_text
         yield partial_text
     tokens = len(tok.tokenize(partial_text))
+    yield partial_text
 demo = gr.ChatInterface(