yangdx
commited on
Commit
·
f28e08e
1
Parent(s):
87708b2
feat: move query-related settings to env file for better configuration
Browse files• Add env vars for token and chunk settings
• Add token count logging for prompts
• Add token count logging for context
• Move hardcoded values to env variables
• Improve logging clarity and consistency
- .env.example +13 -6
- lightrag/base.py +5 -3
- lightrag/lightrag.py +3 -3
- lightrag/operate.py +48 -5
.env.example
CHANGED
|
@@ -27,14 +27,21 @@ TIMEOUT=300
|
|
| 27 |
|
| 28 |
### RAG Configuration
|
| 29 |
MAX_ASYNC=4
|
| 30 |
-
MAX_TOKENS=32768
|
| 31 |
EMBEDDING_DIM=1024
|
| 32 |
MAX_EMBED_TOKENS=8192
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
### LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
|
| 40 |
### Ollama example
|
|
|
|
| 27 |
|
| 28 |
### RAG Configuration
|
| 29 |
MAX_ASYNC=4
|
|
|
|
| 30 |
EMBEDDING_DIM=1024
|
| 31 |
MAX_EMBED_TOKENS=8192
|
| 32 |
+
### Settings relative to query
|
| 33 |
+
HISTORY_TURNS=3
|
| 34 |
+
COSINE_THRESHOLD=0.2
|
| 35 |
+
TOP_K=60
|
| 36 |
+
MAX_TOKEN_TEXT_CHUNK = 4000
|
| 37 |
+
MAX_TOKEN_RELATION_DESC = 4000
|
| 38 |
+
MAX_TOKEN_ENTITY_DESC = 4000
|
| 39 |
+
### Settings relative to indexing
|
| 40 |
+
CHUNK_SIZE=1200
|
| 41 |
+
CHUNK_OVERLAP_SIZE=100
|
| 42 |
+
MAX_TOKENS=32768
|
| 43 |
+
MAX_TOKEN_SUMMARY=500
|
| 44 |
+
SUMMARY_LANGUAGE=English
|
| 45 |
|
| 46 |
### LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
|
| 47 |
### Ollama example
|
lightrag/base.py
CHANGED
|
@@ -54,13 +54,15 @@ class QueryParam:
|
|
| 54 |
top_k: int = int(os.getenv("TOP_K", "60"))
|
| 55 |
"""Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
|
| 56 |
|
| 57 |
-
max_token_for_text_unit: int = 4000
|
| 58 |
"""Maximum number of tokens allowed for each retrieved text chunk."""
|
| 59 |
|
| 60 |
-
max_token_for_global_context: int =
|
|
|
|
|
|
|
| 61 |
"""Maximum number of tokens allocated for relationship descriptions in global retrieval."""
|
| 62 |
|
| 63 |
-
max_token_for_local_context: int = 4000
|
| 64 |
"""Maximum number of tokens allocated for entity descriptions in local retrieval."""
|
| 65 |
|
| 66 |
hl_keywords: list[str] = field(default_factory=list)
|
|
|
|
| 54 |
top_k: int = int(os.getenv("TOP_K", "60"))
|
| 55 |
"""Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
|
| 56 |
|
| 57 |
+
max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
|
| 58 |
"""Maximum number of tokens allowed for each retrieved text chunk."""
|
| 59 |
|
| 60 |
+
max_token_for_global_context: int = int(
|
| 61 |
+
os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
|
| 62 |
+
)
|
| 63 |
"""Maximum number of tokens allocated for relationship descriptions in global retrieval."""
|
| 64 |
|
| 65 |
+
max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
|
| 66 |
"""Maximum number of tokens allocated for entity descriptions in local retrieval."""
|
| 67 |
|
| 68 |
hl_keywords: list[str] = field(default_factory=list)
|
lightrag/lightrag.py
CHANGED
|
@@ -263,10 +263,10 @@ class LightRAG:
|
|
| 263 |
"""Directory where logs are stored. Defaults to the current working directory."""
|
| 264 |
|
| 265 |
# Text chunking
|
| 266 |
-
chunk_token_size: int = 1200
|
| 267 |
"""Maximum number of tokens per text chunk when splitting documents."""
|
| 268 |
|
| 269 |
-
chunk_overlap_token_size: int = 100
|
| 270 |
"""Number of overlapping tokens between consecutive text chunks to preserve context."""
|
| 271 |
|
| 272 |
tiktoken_model_name: str = "gpt-4o-mini"
|
|
@@ -276,7 +276,7 @@ class LightRAG:
|
|
| 276 |
entity_extract_max_gleaning: int = 1
|
| 277 |
"""Maximum number of entity extraction attempts for ambiguous content."""
|
| 278 |
|
| 279 |
-
entity_summary_to_max_tokens: int = 500
|
| 280 |
"""Maximum number of tokens used for summarizing extracted entities."""
|
| 281 |
|
| 282 |
# Node embedding
|
|
|
|
| 263 |
"""Directory where logs are stored. Defaults to the current working directory."""
|
| 264 |
|
| 265 |
# Text chunking
|
| 266 |
+
chunk_token_size: int = int(os.getenv("CHUNK_SIZE", "1200"))
|
| 267 |
"""Maximum number of tokens per text chunk when splitting documents."""
|
| 268 |
|
| 269 |
+
chunk_overlap_token_size: int = int(os.getenv("CHUNK_OVERLAP_SIZE", "100"))
|
| 270 |
"""Number of overlapping tokens between consecutive text chunks to preserve context."""
|
| 271 |
|
| 272 |
tiktoken_model_name: str = "gpt-4o-mini"
|
|
|
|
| 276 |
entity_extract_max_gleaning: int = 1
|
| 277 |
"""Maximum number of entity extraction attempts for ambiguous content."""
|
| 278 |
|
| 279 |
+
entity_summary_to_max_tokens: int = int(os.getenv("MAX_TOKEN_SUMMARY", "500"))
|
| 280 |
"""Maximum number of tokens used for summarizing extracted entities."""
|
| 281 |
|
| 282 |
# Node embedding
|
lightrag/operate.py
CHANGED
|
@@ -642,9 +642,13 @@ async def kg_query(
|
|
| 642 |
history=history_context,
|
| 643 |
)
|
| 644 |
|
|
|
|
| 645 |
if query_param.only_need_prompt:
|
| 646 |
return sys_prompt
|
| 647 |
|
|
|
|
|
|
|
|
|
|
| 648 |
response = await use_model_func(
|
| 649 |
query,
|
| 650 |
system_prompt=sys_prompt,
|
|
@@ -730,6 +734,9 @@ async def extract_keywords_only(
|
|
| 730 |
query=text, examples=examples, language=language, history=history_context
|
| 731 |
)
|
| 732 |
|
|
|
|
|
|
|
|
|
|
| 733 |
# 5. Call the LLM for keyword extraction
|
| 734 |
use_model_func = global_config["llm_model_func"]
|
| 735 |
result = await use_model_func(kw_prompt, keyword_extraction=True)
|
|
@@ -893,7 +900,9 @@ async def mix_kg_vector_query(
|
|
| 893 |
chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
|
| 894 |
formatted_chunks.append(chunk_text)
|
| 895 |
|
| 896 |
-
logger.info(
|
|
|
|
|
|
|
| 897 |
return "\n--New Chunk--\n".join(formatted_chunks)
|
| 898 |
except Exception as e:
|
| 899 |
logger.error(f"Error in get_vector_context: {e}")
|
|
@@ -926,6 +935,9 @@ async def mix_kg_vector_query(
|
|
| 926 |
if query_param.only_need_prompt:
|
| 927 |
return sys_prompt
|
| 928 |
|
|
|
|
|
|
|
|
|
|
| 929 |
# 6. Generate response
|
| 930 |
response = await use_model_func(
|
| 931 |
query,
|
|
@@ -1031,7 +1043,7 @@ async def _build_query_context(
|
|
| 1031 |
if not entities_context.strip() and not relations_context.strip():
|
| 1032 |
return None
|
| 1033 |
|
| 1034 |
-
|
| 1035 |
-----Entities-----
|
| 1036 |
```csv
|
| 1037 |
{entities_context}
|
|
@@ -1045,6 +1057,15 @@ async def _build_query_context(
|
|
| 1045 |
{text_units_context}
|
| 1046 |
```
|
| 1047 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1048 |
|
| 1049 |
|
| 1050 |
async def _get_node_data(
|
|
@@ -1089,7 +1110,7 @@ async def _get_node_data(
|
|
| 1089 |
),
|
| 1090 |
)
|
| 1091 |
logger.info(
|
| 1092 |
-
f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)}
|
| 1093 |
)
|
| 1094 |
|
| 1095 |
# build prompt
|
|
@@ -1222,6 +1243,10 @@ async def _find_most_related_text_unit_from_entities(
|
|
| 1222 |
max_token_size=query_param.max_token_for_text_unit,
|
| 1223 |
)
|
| 1224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1225 |
all_text_units = [t["data"] for t in all_text_units]
|
| 1226 |
return all_text_units
|
| 1227 |
|
|
@@ -1263,6 +1288,9 @@ async def _find_most_related_edges_from_entities(
|
|
| 1263 |
key=lambda x: x["description"],
|
| 1264 |
max_token_size=query_param.max_token_for_global_context,
|
| 1265 |
)
|
|
|
|
|
|
|
|
|
|
| 1266 |
return all_edges_data
|
| 1267 |
|
| 1268 |
|
|
@@ -1310,11 +1338,13 @@ async def _get_edge_data(
|
|
| 1310 |
edge_datas = sorted(
|
| 1311 |
edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
|
| 1312 |
)
|
|
|
|
| 1313 |
edge_datas = truncate_list_by_token_size(
|
| 1314 |
edge_datas,
|
| 1315 |
key=lambda x: x["description"],
|
| 1316 |
max_token_size=query_param.max_token_for_global_context,
|
| 1317 |
)
|
|
|
|
| 1318 |
|
| 1319 |
use_entities, use_text_units = await asyncio.gather(
|
| 1320 |
_find_most_related_entities_from_relationships(
|
|
@@ -1325,7 +1355,7 @@ async def _get_edge_data(
|
|
| 1325 |
),
|
| 1326 |
)
|
| 1327 |
logger.info(
|
| 1328 |
-
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)}
|
| 1329 |
)
|
| 1330 |
|
| 1331 |
relations_section_list = [
|
|
@@ -1414,11 +1444,13 @@ async def _find_most_related_entities_from_relationships(
|
|
| 1414 |
for k, n, d in zip(entity_names, node_datas, node_degrees)
|
| 1415 |
]
|
| 1416 |
|
|
|
|
| 1417 |
node_datas = truncate_list_by_token_size(
|
| 1418 |
node_datas,
|
| 1419 |
key=lambda x: x["description"],
|
| 1420 |
max_token_size=query_param.max_token_for_local_context,
|
| 1421 |
)
|
|
|
|
| 1422 |
|
| 1423 |
return node_datas
|
| 1424 |
|
|
@@ -1474,6 +1506,10 @@ async def _find_related_text_unit_from_relationships(
|
|
| 1474 |
max_token_size=query_param.max_token_for_text_unit,
|
| 1475 |
)
|
| 1476 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1477 |
all_text_units: list[TextChunkSchema] = [t["data"] for t in truncated_text_units]
|
| 1478 |
|
| 1479 |
return all_text_units
|
|
@@ -1541,7 +1577,8 @@ async def naive_query(
|
|
| 1541 |
logger.warning("No chunks left after truncation")
|
| 1542 |
return PROMPTS["fail_response"]
|
| 1543 |
|
| 1544 |
-
logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)}
|
|
|
|
| 1545 |
section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
|
| 1546 |
|
| 1547 |
if query_param.only_need_context:
|
|
@@ -1564,6 +1601,9 @@ async def naive_query(
|
|
| 1564 |
if query_param.only_need_prompt:
|
| 1565 |
return sys_prompt
|
| 1566 |
|
|
|
|
|
|
|
|
|
|
| 1567 |
response = await use_model_func(
|
| 1568 |
query,
|
| 1569 |
system_prompt=sys_prompt,
|
|
@@ -1706,6 +1746,9 @@ async def kg_query_with_keywords(
|
|
| 1706 |
if query_param.only_need_prompt:
|
| 1707 |
return sys_prompt
|
| 1708 |
|
|
|
|
|
|
|
|
|
|
| 1709 |
response = await use_model_func(
|
| 1710 |
query,
|
| 1711 |
system_prompt=sys_prompt,
|
|
|
|
| 642 |
history=history_context,
|
| 643 |
)
|
| 644 |
|
| 645 |
+
|
| 646 |
if query_param.only_need_prompt:
|
| 647 |
return sys_prompt
|
| 648 |
|
| 649 |
+
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
| 650 |
+
logger.info(f"[kg_query]Prompt Tokens: {len_of_prompts}")
|
| 651 |
+
|
| 652 |
response = await use_model_func(
|
| 653 |
query,
|
| 654 |
system_prompt=sys_prompt,
|
|
|
|
| 734 |
query=text, examples=examples, language=language, history=history_context
|
| 735 |
)
|
| 736 |
|
| 737 |
+
len_of_prompts = len(encode_string_by_tiktoken(kw_prompt))
|
| 738 |
+
logger.info(f"[kg_query]Prompt Tokens: {len_of_prompts}")
|
| 739 |
+
|
| 740 |
# 5. Call the LLM for keyword extraction
|
| 741 |
use_model_func = global_config["llm_model_func"]
|
| 742 |
result = await use_model_func(kw_prompt, keyword_extraction=True)
|
|
|
|
| 900 |
chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
|
| 901 |
formatted_chunks.append(chunk_text)
|
| 902 |
|
| 903 |
+
logger.info(
|
| 904 |
+
f"Truncate text chunks from {len(chunks)} to {len(formatted_chunks)}"
|
| 905 |
+
)
|
| 906 |
return "\n--New Chunk--\n".join(formatted_chunks)
|
| 907 |
except Exception as e:
|
| 908 |
logger.error(f"Error in get_vector_context: {e}")
|
|
|
|
| 935 |
if query_param.only_need_prompt:
|
| 936 |
return sys_prompt
|
| 937 |
|
| 938 |
+
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
| 939 |
+
logger.info(f"[mix_kg_vector_query]Prompt Tokens: {len_of_prompts}")
|
| 940 |
+
|
| 941 |
# 6. Generate response
|
| 942 |
response = await use_model_func(
|
| 943 |
query,
|
|
|
|
| 1043 |
if not entities_context.strip() and not relations_context.strip():
|
| 1044 |
return None
|
| 1045 |
|
| 1046 |
+
result = f"""
|
| 1047 |
-----Entities-----
|
| 1048 |
```csv
|
| 1049 |
{entities_context}
|
|
|
|
| 1057 |
{text_units_context}
|
| 1058 |
```
|
| 1059 |
"""
|
| 1060 |
+
contex_tokens = len(encode_string_by_tiktoken(result))
|
| 1061 |
+
entities_tokens = len(encode_string_by_tiktoken(entities_context))
|
| 1062 |
+
relations_tokens = len(encode_string_by_tiktoken(relations_context))
|
| 1063 |
+
text_units_tokens = len(encode_string_by_tiktoken(text_units_context))
|
| 1064 |
+
logger.info(
|
| 1065 |
+
f"Context Tokens - Total: {contex_tokens}, Entities: {entities_tokens}, Relations: {relations_tokens}, Chunks: {text_units_tokens}"
|
| 1066 |
+
)
|
| 1067 |
+
|
| 1068 |
+
return result
|
| 1069 |
|
| 1070 |
|
| 1071 |
async def _get_node_data(
|
|
|
|
| 1110 |
),
|
| 1111 |
)
|
| 1112 |
logger.info(
|
| 1113 |
+
f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks"
|
| 1114 |
)
|
| 1115 |
|
| 1116 |
# build prompt
|
|
|
|
| 1243 |
max_token_size=query_param.max_token_for_text_unit,
|
| 1244 |
)
|
| 1245 |
|
| 1246 |
+
logger.info(
|
| 1247 |
+
f"Truncate text chunks from {len(all_text_units_lookup)} to {len(all_text_units)}"
|
| 1248 |
+
)
|
| 1249 |
+
|
| 1250 |
all_text_units = [t["data"] for t in all_text_units]
|
| 1251 |
return all_text_units
|
| 1252 |
|
|
|
|
| 1288 |
key=lambda x: x["description"],
|
| 1289 |
max_token_size=query_param.max_token_for_global_context,
|
| 1290 |
)
|
| 1291 |
+
|
| 1292 |
+
logger.info(f"Truncate relations from {len(all_edges)} to {len(all_edges_data)}")
|
| 1293 |
+
|
| 1294 |
return all_edges_data
|
| 1295 |
|
| 1296 |
|
|
|
|
| 1338 |
edge_datas = sorted(
|
| 1339 |
edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
|
| 1340 |
)
|
| 1341 |
+
len_edge_datas = len(edge_datas)
|
| 1342 |
edge_datas = truncate_list_by_token_size(
|
| 1343 |
edge_datas,
|
| 1344 |
key=lambda x: x["description"],
|
| 1345 |
max_token_size=query_param.max_token_for_global_context,
|
| 1346 |
)
|
| 1347 |
+
logger.info(f"Truncate relations from {len_edge_datas} to {len(edge_datas)}")
|
| 1348 |
|
| 1349 |
use_entities, use_text_units = await asyncio.gather(
|
| 1350 |
_find_most_related_entities_from_relationships(
|
|
|
|
| 1355 |
),
|
| 1356 |
)
|
| 1357 |
logger.info(
|
| 1358 |
+
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
|
| 1359 |
)
|
| 1360 |
|
| 1361 |
relations_section_list = [
|
|
|
|
| 1444 |
for k, n, d in zip(entity_names, node_datas, node_degrees)
|
| 1445 |
]
|
| 1446 |
|
| 1447 |
+
len_node_datas = len(node_datas)
|
| 1448 |
node_datas = truncate_list_by_token_size(
|
| 1449 |
node_datas,
|
| 1450 |
key=lambda x: x["description"],
|
| 1451 |
max_token_size=query_param.max_token_for_local_context,
|
| 1452 |
)
|
| 1453 |
+
logger.info(f"Truncate entities from {len_node_datas} to {len(node_datas)}")
|
| 1454 |
|
| 1455 |
return node_datas
|
| 1456 |
|
|
|
|
| 1506 |
max_token_size=query_param.max_token_for_text_unit,
|
| 1507 |
)
|
| 1508 |
|
| 1509 |
+
logger.info(
|
| 1510 |
+
f"Truncate text chunks from {len(valid_text_units)} to {len(truncated_text_units)}"
|
| 1511 |
+
)
|
| 1512 |
+
|
| 1513 |
all_text_units: list[TextChunkSchema] = [t["data"] for t in truncated_text_units]
|
| 1514 |
|
| 1515 |
return all_text_units
|
|
|
|
| 1577 |
logger.warning("No chunks left after truncation")
|
| 1578 |
return PROMPTS["fail_response"]
|
| 1579 |
|
| 1580 |
+
logger.info(f"Truncate text chunks from {len(chunks)} to {len(maybe_trun_chunks)}")
|
| 1581 |
+
|
| 1582 |
section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
|
| 1583 |
|
| 1584 |
if query_param.only_need_context:
|
|
|
|
| 1601 |
if query_param.only_need_prompt:
|
| 1602 |
return sys_prompt
|
| 1603 |
|
| 1604 |
+
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
| 1605 |
+
logger.info(f"[naive_query]Prompt Tokens: {len_of_prompts}")
|
| 1606 |
+
|
| 1607 |
response = await use_model_func(
|
| 1608 |
query,
|
| 1609 |
system_prompt=sys_prompt,
|
|
|
|
| 1746 |
if query_param.only_need_prompt:
|
| 1747 |
return sys_prompt
|
| 1748 |
|
| 1749 |
+
len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
|
| 1750 |
+
logger.info(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}")
|
| 1751 |
+
|
| 1752 |
response = await use_model_func(
|
| 1753 |
query,
|
| 1754 |
system_prompt=sys_prompt,
|