yangdx
commited on
Commit
·
2494cef
1
Parent(s):
55aa606
Change log level from info to debug for token count logging
Browse files- lightrag/operate.py +13 -13
lightrag/operate.py
CHANGED
|
@@ -688,7 +688,7 @@ async def kg_query(
|
|
| 688 |
return sys_prompt
|
| 689 |
|
| 690 |
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
| 691 |
-
logger.
|
| 692 |
|
| 693 |
response = await use_model_func(
|
| 694 |
query,
|
|
@@ -776,7 +776,7 @@ async def extract_keywords_only(
|
|
| 776 |
)
|
| 777 |
|
| 778 |
len_of_prompts = len(encode_string_by_tiktoken(kw_prompt))
|
| 779 |
-
logger.
|
| 780 |
|
| 781 |
# 5. Call the LLM for keyword extraction
|
| 782 |
use_model_func = global_config["llm_model_func"]
|
|
@@ -941,7 +941,7 @@ async def mix_kg_vector_query(
|
|
| 941 |
chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
|
| 942 |
formatted_chunks.append(chunk_text)
|
| 943 |
|
| 944 |
-
logger.
|
| 945 |
f"Truncate chunks from {len(chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
|
| 946 |
)
|
| 947 |
return "\n--New Chunk--\n".join(formatted_chunks)
|
|
@@ -977,7 +977,7 @@ async def mix_kg_vector_query(
|
|
| 977 |
return sys_prompt
|
| 978 |
|
| 979 |
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
| 980 |
-
logger.
|
| 981 |
|
| 982 |
# 6. Generate response
|
| 983 |
response = await use_model_func(
|
|
@@ -1102,7 +1102,7 @@ async def _build_query_context(
|
|
| 1102 |
entities_tokens = len(encode_string_by_tiktoken(entities_context))
|
| 1103 |
relations_tokens = len(encode_string_by_tiktoken(relations_context))
|
| 1104 |
text_units_tokens = len(encode_string_by_tiktoken(text_units_context))
|
| 1105 |
-
logger.
|
| 1106 |
f"Context Tokens - Total: {contex_tokens}, Entities: {entities_tokens}, Relations: {relations_tokens}, Chunks: {text_units_tokens}"
|
| 1107 |
)
|
| 1108 |
|
|
@@ -1157,7 +1157,7 @@ async def _get_node_data(
|
|
| 1157 |
key=lambda x: x["description"],
|
| 1158 |
max_token_size=query_param.max_token_for_local_context,
|
| 1159 |
)
|
| 1160 |
-
logger.
|
| 1161 |
f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
|
| 1162 |
)
|
| 1163 |
|
|
@@ -1295,7 +1295,7 @@ async def _find_most_related_text_unit_from_entities(
|
|
| 1295 |
max_token_size=query_param.max_token_for_text_unit,
|
| 1296 |
)
|
| 1297 |
|
| 1298 |
-
logger.
|
| 1299 |
f"Truncate chunks from {len(all_text_units_lookup)} to {len(all_text_units)} (max tokens:{query_param.max_token_for_text_unit})"
|
| 1300 |
)
|
| 1301 |
|
|
@@ -1341,7 +1341,7 @@ async def _find_most_related_edges_from_entities(
|
|
| 1341 |
max_token_size=query_param.max_token_for_global_context,
|
| 1342 |
)
|
| 1343 |
|
| 1344 |
-
logger.
|
| 1345 |
f"Truncate relations from {len(all_edges)} to {len(all_edges_data)} (max tokens:{query_param.max_token_for_global_context})"
|
| 1346 |
)
|
| 1347 |
|
|
@@ -1398,7 +1398,7 @@ async def _get_edge_data(
|
|
| 1398 |
key=lambda x: x["description"],
|
| 1399 |
max_token_size=query_param.max_token_for_global_context,
|
| 1400 |
)
|
| 1401 |
-
logger.
|
| 1402 |
f"Truncate relations from {len_edge_datas} to {len(edge_datas)} (max tokens:{query_param.max_token_for_global_context})"
|
| 1403 |
)
|
| 1404 |
|
|
@@ -1506,7 +1506,7 @@ async def _find_most_related_entities_from_relationships(
|
|
| 1506 |
key=lambda x: x["description"],
|
| 1507 |
max_token_size=query_param.max_token_for_local_context,
|
| 1508 |
)
|
| 1509 |
-
logger.
|
| 1510 |
f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
|
| 1511 |
)
|
| 1512 |
|
|
@@ -1564,7 +1564,7 @@ async def _find_related_text_unit_from_relationships(
|
|
| 1564 |
max_token_size=query_param.max_token_for_text_unit,
|
| 1565 |
)
|
| 1566 |
|
| 1567 |
-
logger.
|
| 1568 |
f"Truncate chunks from {len(valid_text_units)} to {len(truncated_text_units)} (max tokens:{query_param.max_token_for_text_unit})"
|
| 1569 |
)
|
| 1570 |
|
|
@@ -1635,7 +1635,7 @@ async def naive_query(
|
|
| 1635 |
logger.warning("No chunks left after truncation")
|
| 1636 |
return PROMPTS["fail_response"]
|
| 1637 |
|
| 1638 |
-
logger.
|
| 1639 |
f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
|
| 1640 |
)
|
| 1641 |
|
|
@@ -1807,7 +1807,7 @@ async def kg_query_with_keywords(
|
|
| 1807 |
return sys_prompt
|
| 1808 |
|
| 1809 |
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
| 1810 |
-
logger.
|
| 1811 |
|
| 1812 |
response = await use_model_func(
|
| 1813 |
query,
|
|
|
|
| 688 |
return sys_prompt
|
| 689 |
|
| 690 |
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
| 691 |
+
logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
|
| 692 |
|
| 693 |
response = await use_model_func(
|
| 694 |
query,
|
|
|
|
| 776 |
)
|
| 777 |
|
| 778 |
len_of_prompts = len(encode_string_by_tiktoken(kw_prompt))
|
| 779 |
+
logger.debug(f"[kg_query]Prompt Tokens: {len_of_prompts}")
|
| 780 |
|
| 781 |
# 5. Call the LLM for keyword extraction
|
| 782 |
use_model_func = global_config["llm_model_func"]
|
|
|
|
| 941 |
chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
|
| 942 |
formatted_chunks.append(chunk_text)
|
| 943 |
|
| 944 |
+
logger.debug(
|
| 945 |
f"Truncate chunks from {len(chunks)} to {len(formatted_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
|
| 946 |
)
|
| 947 |
return "\n--New Chunk--\n".join(formatted_chunks)
|
|
|
|
| 977 |
return sys_prompt
|
| 978 |
|
| 979 |
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
| 980 |
+
logger.debug(f"[mix_kg_vector_query]Prompt Tokens: {len_of_prompts}")
|
| 981 |
|
| 982 |
# 6. Generate response
|
| 983 |
response = await use_model_func(
|
|
|
|
| 1102 |
entities_tokens = len(encode_string_by_tiktoken(entities_context))
|
| 1103 |
relations_tokens = len(encode_string_by_tiktoken(relations_context))
|
| 1104 |
text_units_tokens = len(encode_string_by_tiktoken(text_units_context))
|
| 1105 |
+
logger.debug(
|
| 1106 |
f"Context Tokens - Total: {contex_tokens}, Entities: {entities_tokens}, Relations: {relations_tokens}, Chunks: {text_units_tokens}"
|
| 1107 |
)
|
| 1108 |
|
|
|
|
| 1157 |
key=lambda x: x["description"],
|
| 1158 |
max_token_size=query_param.max_token_for_local_context,
|
| 1159 |
)
|
| 1160 |
+
logger.debug(
|
| 1161 |
f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
|
| 1162 |
)
|
| 1163 |
|
|
|
|
| 1295 |
max_token_size=query_param.max_token_for_text_unit,
|
| 1296 |
)
|
| 1297 |
|
| 1298 |
+
logger.debug(
|
| 1299 |
f"Truncate chunks from {len(all_text_units_lookup)} to {len(all_text_units)} (max tokens:{query_param.max_token_for_text_unit})"
|
| 1300 |
)
|
| 1301 |
|
|
|
|
| 1341 |
max_token_size=query_param.max_token_for_global_context,
|
| 1342 |
)
|
| 1343 |
|
| 1344 |
+
logger.debug(
|
| 1345 |
f"Truncate relations from {len(all_edges)} to {len(all_edges_data)} (max tokens:{query_param.max_token_for_global_context})"
|
| 1346 |
)
|
| 1347 |
|
|
|
|
| 1398 |
key=lambda x: x["description"],
|
| 1399 |
max_token_size=query_param.max_token_for_global_context,
|
| 1400 |
)
|
| 1401 |
+
logger.debug(
|
| 1402 |
f"Truncate relations from {len_edge_datas} to {len(edge_datas)} (max tokens:{query_param.max_token_for_global_context})"
|
| 1403 |
)
|
| 1404 |
|
|
|
|
| 1506 |
key=lambda x: x["description"],
|
| 1507 |
max_token_size=query_param.max_token_for_local_context,
|
| 1508 |
)
|
| 1509 |
+
logger.debug(
|
| 1510 |
f"Truncate entities from {len_node_datas} to {len(node_datas)} (max tokens:{query_param.max_token_for_local_context})"
|
| 1511 |
)
|
| 1512 |
|
|
|
|
| 1564 |
max_token_size=query_param.max_token_for_text_unit,
|
| 1565 |
)
|
| 1566 |
|
| 1567 |
+
logger.debug(
|
| 1568 |
f"Truncate chunks from {len(valid_text_units)} to {len(truncated_text_units)} (max tokens:{query_param.max_token_for_text_unit})"
|
| 1569 |
)
|
| 1570 |
|
|
|
|
| 1635 |
logger.warning("No chunks left after truncation")
|
| 1636 |
return PROMPTS["fail_response"]
|
| 1637 |
|
| 1638 |
+
logger.debug(
|
| 1639 |
f"Truncate chunks from {len(chunks)} to {len(maybe_trun_chunks)} (max tokens:{query_param.max_token_for_text_unit})"
|
| 1640 |
)
|
| 1641 |
|
|
|
|
| 1807 |
return sys_prompt
|
| 1808 |
|
| 1809 |
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
| 1810 |
+
logger.debug(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}")
|
| 1811 |
|
| 1812 |
response = await use_model_func(
|
| 1813 |
query,
|