eswardivi commited on
Commit
0f1f78e
1 Parent(s): 1290a74

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -9
app.py CHANGED
@@ -39,7 +39,6 @@ else:
39
 
40
  @spaces.GPU(duration=150)
41
  def chat(message, history, temperature,do_sample, max_tokens):
42
- start_time = time.time()
43
  chat = []
44
  for item in history:
45
  chat.append({"role": "user", "content": item[0]})
@@ -67,19 +66,12 @@ def chat(message, history, temperature,do_sample, max_tokens):
67
  t.start()
68
 
69
  partial_text = ""
70
- first_token_time = None
71
  for new_text in streamer:
72
- if not first_token_time:
73
- first_token_time = time.time() - start_time
74
  partial_text += new_text
75
  yield partial_text
76
 
77
- total_time = time.time() - start_time
78
  tokens = len(tok.tokenize(partial_text))
79
- tokens_per_second = tokens / total_time if total_time > 0 else 0
80
-
81
- timing_info = f"\n\nTime taken to first token: {first_token_time:.2f} seconds\nTokens per second: {tokens_per_second:.2f}"
82
- yield partial_text + timing_info
83
 
84
 
85
  demo = gr.ChatInterface(
 
39
 
40
  @spaces.GPU(duration=150)
41
  def chat(message, history, temperature,do_sample, max_tokens):
 
42
  chat = []
43
  for item in history:
44
  chat.append({"role": "user", "content": item[0]})
 
66
  t.start()
67
 
68
  partial_text = ""
 
69
  for new_text in streamer:
 
 
70
  partial_text += new_text
71
  yield partial_text
72
 
 
73
  tokens = len(tok.tokenize(partial_text))
74
+ yield partial_text
 
 
 
75
 
76
 
77
  demo = gr.ChatInterface(