yangdx commited on
Commit
6200fba
·
1 Parent(s): 27c5884

Fix timing calculation logic in OllamaAPI stream generators

Browse files

• Initialize first_chunk_time as None
• Set timing only when first chunk arrives

Files changed (1) hide show
  1. lightrag/api/ollama_api.py +10 -5
lightrag/api/ollama_api.py CHANGED
@@ -203,14 +203,15 @@ class OllamaAPI:
203
  )
204
 
205
  async def stream_generator():
206
- first_chunk_time = time.time_ns()
207
- last_chunk_time = first_chunk_time
208
  total_response = ""
209
 
210
  try:
211
  # Ensure response is an async generator
212
  if isinstance(response, str):
213
  # If it's a string, send in two parts
 
214
  last_chunk_time = time.time_ns()
215
  total_response = response
216
 
@@ -282,7 +283,8 @@ class OllamaAPI:
282
  }
283
  yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
284
  return
285
-
 
286
  completion_tokens = estimate_tokens(total_response)
287
  total_time = last_chunk_time - start_time
288
  prompt_eval_time = first_chunk_time - start_time
@@ -407,14 +409,15 @@ class OllamaAPI:
407
  )
408
 
409
  async def stream_generator():
410
- first_chunk_time = time.time_ns()
411
- last_chunk_time = first_chunk_time
412
  total_response = ""
413
 
414
  try:
415
  # Ensure response is an async generator
416
  if isinstance(response, str):
417
  # If it's a string, send in two parts
 
418
  last_chunk_time = time.time_ns()
419
  total_response = response
420
 
@@ -499,6 +502,8 @@ class OllamaAPI:
499
  yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
500
  return
501
 
 
 
502
  completion_tokens = estimate_tokens(total_response)
503
  total_time = last_chunk_time - start_time
504
  prompt_eval_time = first_chunk_time - start_time
 
203
  )
204
 
205
  async def stream_generator():
206
+ first_chunk_time = None
207
+ last_chunk_time = time.time_ns()
208
  total_response = ""
209
 
210
  try:
211
  # Ensure response is an async generator
212
  if isinstance(response, str):
213
  # If it's a string, send in two parts
214
+ first_chunk_time = last_chunk_time
215
  last_chunk_time = time.time_ns()
216
  total_response = response
217
 
 
283
  }
284
  yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
285
  return
286
+ if first_chunk_time is None:
287
+ first_chunk_time = last_chunk_time
288
  completion_tokens = estimate_tokens(total_response)
289
  total_time = last_chunk_time - start_time
290
  prompt_eval_time = first_chunk_time - start_time
 
409
  )
410
 
411
  async def stream_generator():
412
+ first_chunk_time = None
413
+ last_chunk_time = time.time_ns()
414
  total_response = ""
415
 
416
  try:
417
  # Ensure response is an async generator
418
  if isinstance(response, str):
419
  # If it's a string, send in two parts
420
+ first_chunk_time = last_chunk_time
421
  last_chunk_time = time.time_ns()
422
  total_response = response
423
 
 
502
  yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
503
  return
504
 
505
+ if first_chunk_time is None:
506
+ first_chunk_time = last_chunk_time
507
  completion_tokens = estimate_tokens(total_response)
508
  total_time = last_chunk_time - start_time
509
  prompt_eval_time = first_chunk_time - start_time