yangdx
commited on
Commit
·
6200fba
1
Parent(s):
27c5884
Fix timing calculation logic in OllamaAPI stream generators
Browse files• Initialize first_chunk_time as None
• Set timing only when first chunk arrives
- lightrag/api/ollama_api.py +10 -5
lightrag/api/ollama_api.py
CHANGED
@@ -203,14 +203,15 @@ class OllamaAPI:
|
|
203 |
)
|
204 |
|
205 |
async def stream_generator():
|
206 |
-
first_chunk_time =
|
207 |
-
last_chunk_time =
|
208 |
total_response = ""
|
209 |
|
210 |
try:
|
211 |
# Ensure response is an async generator
|
212 |
if isinstance(response, str):
|
213 |
# If it's a string, send in two parts
|
|
|
214 |
last_chunk_time = time.time_ns()
|
215 |
total_response = response
|
216 |
|
@@ -282,7 +283,8 @@ class OllamaAPI:
|
|
282 |
}
|
283 |
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
|
284 |
return
|
285 |
-
|
|
|
286 |
completion_tokens = estimate_tokens(total_response)
|
287 |
total_time = last_chunk_time - start_time
|
288 |
prompt_eval_time = first_chunk_time - start_time
|
@@ -407,14 +409,15 @@ class OllamaAPI:
|
|
407 |
)
|
408 |
|
409 |
async def stream_generator():
|
410 |
-
first_chunk_time =
|
411 |
-
last_chunk_time =
|
412 |
total_response = ""
|
413 |
|
414 |
try:
|
415 |
# Ensure response is an async generator
|
416 |
if isinstance(response, str):
|
417 |
# If it's a string, send in two parts
|
|
|
418 |
last_chunk_time = time.time_ns()
|
419 |
total_response = response
|
420 |
|
@@ -499,6 +502,8 @@ class OllamaAPI:
|
|
499 |
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
|
500 |
return
|
501 |
|
|
|
|
|
502 |
completion_tokens = estimate_tokens(total_response)
|
503 |
total_time = last_chunk_time - start_time
|
504 |
prompt_eval_time = first_chunk_time - start_time
|
|
|
203 |
)
|
204 |
|
205 |
async def stream_generator():
|
206 |
+
first_chunk_time = None
|
207 |
+
last_chunk_time = time.time_ns()
|
208 |
total_response = ""
|
209 |
|
210 |
try:
|
211 |
# Ensure response is an async generator
|
212 |
if isinstance(response, str):
|
213 |
# If it's a string, send in two parts
|
214 |
+
first_chunk_time = last_chunk_time
|
215 |
last_chunk_time = time.time_ns()
|
216 |
total_response = response
|
217 |
|
|
|
283 |
}
|
284 |
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
|
285 |
return
|
286 |
+
if first_chunk_time is None:
|
287 |
+
first_chunk_time = last_chunk_time
|
288 |
completion_tokens = estimate_tokens(total_response)
|
289 |
total_time = last_chunk_time - start_time
|
290 |
prompt_eval_time = first_chunk_time - start_time
|
|
|
409 |
)
|
410 |
|
411 |
async def stream_generator():
|
412 |
+
first_chunk_time = None
|
413 |
+
last_chunk_time = time.time_ns()
|
414 |
total_response = ""
|
415 |
|
416 |
try:
|
417 |
# Ensure response is an async generator
|
418 |
if isinstance(response, str):
|
419 |
# If it's a string, send in two parts
|
420 |
+
first_chunk_time = last_chunk_time
|
421 |
last_chunk_time = time.time_ns()
|
422 |
total_response = response
|
423 |
|
|
|
502 |
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
|
503 |
return
|
504 |
|
505 |
+
if first_chunk_time is None:
|
506 |
+
first_chunk_time = last_chunk_time
|
507 |
completion_tokens = estimate_tokens(total_response)
|
508 |
total_time = last_chunk_time - start_time
|
509 |
prompt_eval_time = first_chunk_time - start_time
|