yangdx
commited on
Commit
·
b59560d
1
Parent(s):
6200fba
fix: improve timing accuracy and variable scoping in OllamaAPI
Browse files- lightrag/api/ollama_api.py +12 -12
lightrag/api/ollama_api.py
CHANGED
|
@@ -203,15 +203,15 @@ class OllamaAPI:
|
|
| 203 |
)
|
| 204 |
|
| 205 |
async def stream_generator():
|
| 206 |
-
first_chunk_time = None
|
| 207 |
-
last_chunk_time = time.time_ns()
|
| 208 |
-
total_response = ""
|
| 209 |
-
|
| 210 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
# Ensure response is an async generator
|
| 212 |
if isinstance(response, str):
|
| 213 |
# If it's a string, send in two parts
|
| 214 |
-
first_chunk_time =
|
| 215 |
last_chunk_time = time.time_ns()
|
| 216 |
total_response = response
|
| 217 |
|
|
@@ -284,7 +284,7 @@ class OllamaAPI:
|
|
| 284 |
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
|
| 285 |
return
|
| 286 |
if first_chunk_time is None:
|
| 287 |
-
first_chunk_time =
|
| 288 |
completion_tokens = estimate_tokens(total_response)
|
| 289 |
total_time = last_chunk_time - start_time
|
| 290 |
prompt_eval_time = first_chunk_time - start_time
|
|
@@ -409,15 +409,15 @@ class OllamaAPI:
|
|
| 409 |
)
|
| 410 |
|
| 411 |
async def stream_generator():
|
| 412 |
-
first_chunk_time = None
|
| 413 |
-
last_chunk_time = time.time_ns()
|
| 414 |
-
total_response = ""
|
| 415 |
-
|
| 416 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
# Ensure response is an async generator
|
| 418 |
if isinstance(response, str):
|
| 419 |
# If it's a string, send in two parts
|
| 420 |
-
first_chunk_time =
|
| 421 |
last_chunk_time = time.time_ns()
|
| 422 |
total_response = response
|
| 423 |
|
|
@@ -503,7 +503,7 @@ class OllamaAPI:
|
|
| 503 |
return
|
| 504 |
|
| 505 |
if first_chunk_time is None:
|
| 506 |
-
first_chunk_time =
|
| 507 |
completion_tokens = estimate_tokens(total_response)
|
| 508 |
total_time = last_chunk_time - start_time
|
| 509 |
prompt_eval_time = first_chunk_time - start_time
|
|
|
|
| 203 |
)
|
| 204 |
|
| 205 |
async def stream_generator():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
try:
|
| 207 |
+
first_chunk_time = None
|
| 208 |
+
last_chunk_time = time.time_ns()
|
| 209 |
+
total_response = ""
|
| 210 |
+
|
| 211 |
# Ensure response is an async generator
|
| 212 |
if isinstance(response, str):
|
| 213 |
# If it's a string, send in two parts
|
| 214 |
+
first_chunk_time = start_time
|
| 215 |
last_chunk_time = time.time_ns()
|
| 216 |
total_response = response
|
| 217 |
|
|
|
|
| 284 |
yield f"{json.dumps(final_data, ensure_ascii=False)}\n"
|
| 285 |
return
|
| 286 |
if first_chunk_time is None:
|
| 287 |
+
first_chunk_time = start_time
|
| 288 |
completion_tokens = estimate_tokens(total_response)
|
| 289 |
total_time = last_chunk_time - start_time
|
| 290 |
prompt_eval_time = first_chunk_time - start_time
|
|
|
|
| 409 |
)
|
| 410 |
|
| 411 |
async def stream_generator():
|
|
|
|
|
|
|
|
|
|
|
|
|
| 412 |
try:
|
| 413 |
+
first_chunk_time = None
|
| 414 |
+
last_chunk_time = time.time_ns()
|
| 415 |
+
total_response = ""
|
| 416 |
+
|
| 417 |
# Ensure response is an async generator
|
| 418 |
if isinstance(response, str):
|
| 419 |
# If it's a string, send in two parts
|
| 420 |
+
first_chunk_time = start_time
|
| 421 |
last_chunk_time = time.time_ns()
|
| 422 |
total_response = response
|
| 423 |
|
|
|
|
| 503 |
return
|
| 504 |
|
| 505 |
if first_chunk_time is None:
|
| 506 |
+
first_chunk_time = start_time
|
| 507 |
completion_tokens = estimate_tokens(total_response)
|
| 508 |
total_time = last_chunk_time - start_time
|
| 509 |
prompt_eval_time = first_chunk_time - start_time
|