yangdx commited on
Commit
f28e08e
·
1 Parent(s): 87708b2

feat: move query-related settings to env file for better configuration

Browse files

• Add env vars for token and chunk settings
• Add token count logging for prompts
• Add token count logging for context
• Move hardcoded values to env variables
• Improve logging clarity and consistency

Files changed (4) hide show
  1. .env.example +13 -6
  2. lightrag/base.py +5 -3
  3. lightrag/lightrag.py +3 -3
  4. lightrag/operate.py +48 -5
.env.example CHANGED
@@ -27,14 +27,21 @@ TIMEOUT=300
27
 
28
  ### RAG Configuration
29
  MAX_ASYNC=4
30
- MAX_TOKENS=32768
31
  EMBEDDING_DIM=1024
32
  MAX_EMBED_TOKENS=8192
33
- #HISTORY_TURNS=3
34
- #CHUNK_SIZE=1200
35
- #CHUNK_OVERLAP_SIZE=100
36
- #COSINE_THRESHOLD=0.2
37
- #TOP_K=60
 
 
 
 
 
 
 
 
38
 
39
  ### LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
40
  ### Ollama example
 
27
 
28
  ### RAG Configuration
29
  MAX_ASYNC=4
 
30
  EMBEDDING_DIM=1024
31
  MAX_EMBED_TOKENS=8192
32
+ ### Settings relative to query
33
+ HISTORY_TURNS=3
34
+ COSINE_THRESHOLD=0.2
35
+ TOP_K=60
36
+ MAX_TOKEN_TEXT_CHUNK = 4000
37
+ MAX_TOKEN_RELATION_DESC = 4000
38
+ MAX_TOKEN_ENTITY_DESC = 4000
39
+ ### Settings relative to indexing
40
+ CHUNK_SIZE=1200
41
+ CHUNK_OVERLAP_SIZE=100
42
+ MAX_TOKENS=32768
43
+ MAX_TOKEN_SUMMARY=500
44
+ SUMMARY_LANGUAGE=English
45
 
46
  ### LLM Configuration (Use valid host. For local services, you can use host.docker.internal)
47
  ### Ollama example
lightrag/base.py CHANGED
@@ -54,13 +54,15 @@ class QueryParam:
54
  top_k: int = int(os.getenv("TOP_K", "60"))
55
  """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
56
 
57
- max_token_for_text_unit: int = 4000
58
  """Maximum number of tokens allowed for each retrieved text chunk."""
59
 
60
- max_token_for_global_context: int = 4000
 
 
61
  """Maximum number of tokens allocated for relationship descriptions in global retrieval."""
62
 
63
- max_token_for_local_context: int = 4000
64
  """Maximum number of tokens allocated for entity descriptions in local retrieval."""
65
 
66
  hl_keywords: list[str] = field(default_factory=list)
 
54
  top_k: int = int(os.getenv("TOP_K", "60"))
55
  """Number of top items to retrieve. Represents entities in 'local' mode and relationships in 'global' mode."""
56
 
57
+ max_token_for_text_unit: int = int(os.getenv("MAX_TOKEN_TEXT_CHUNK", "4000"))
58
  """Maximum number of tokens allowed for each retrieved text chunk."""
59
 
60
+ max_token_for_global_context: int = int(
61
+ os.getenv("MAX_TOKEN_RELATION_DESC", "4000")
62
+ )
63
  """Maximum number of tokens allocated for relationship descriptions in global retrieval."""
64
 
65
+ max_token_for_local_context: int = int(os.getenv("MAX_TOKEN_ENTITY_DESC", "4000"))
66
  """Maximum number of tokens allocated for entity descriptions in local retrieval."""
67
 
68
  hl_keywords: list[str] = field(default_factory=list)
lightrag/lightrag.py CHANGED
@@ -263,10 +263,10 @@ class LightRAG:
263
  """Directory where logs are stored. Defaults to the current working directory."""
264
 
265
  # Text chunking
266
- chunk_token_size: int = 1200
267
  """Maximum number of tokens per text chunk when splitting documents."""
268
 
269
- chunk_overlap_token_size: int = 100
270
  """Number of overlapping tokens between consecutive text chunks to preserve context."""
271
 
272
  tiktoken_model_name: str = "gpt-4o-mini"
@@ -276,7 +276,7 @@ class LightRAG:
276
  entity_extract_max_gleaning: int = 1
277
  """Maximum number of entity extraction attempts for ambiguous content."""
278
 
279
- entity_summary_to_max_tokens: int = 500
280
  """Maximum number of tokens used for summarizing extracted entities."""
281
 
282
  # Node embedding
 
263
  """Directory where logs are stored. Defaults to the current working directory."""
264
 
265
  # Text chunking
266
+ chunk_token_size: int = int(os.getenv("CHUNK_SIZE", "1200"))
267
  """Maximum number of tokens per text chunk when splitting documents."""
268
 
269
+ chunk_overlap_token_size: int = int(os.getenv("CHUNK_OVERLAP_SIZE", "100"))
270
  """Number of overlapping tokens between consecutive text chunks to preserve context."""
271
 
272
  tiktoken_model_name: str = "gpt-4o-mini"
 
276
  entity_extract_max_gleaning: int = 1
277
  """Maximum number of entity extraction attempts for ambiguous content."""
278
 
279
+ entity_summary_to_max_tokens: int = int(os.getenv("MAX_TOKEN_SUMMARY", "500"))
280
  """Maximum number of tokens used for summarizing extracted entities."""
281
 
282
  # Node embedding
lightrag/operate.py CHANGED
@@ -642,9 +642,13 @@ async def kg_query(
642
  history=history_context,
643
  )
644
 
 
645
  if query_param.only_need_prompt:
646
  return sys_prompt
647
 
 
 
 
648
  response = await use_model_func(
649
  query,
650
  system_prompt=sys_prompt,
@@ -730,6 +734,9 @@ async def extract_keywords_only(
730
  query=text, examples=examples, language=language, history=history_context
731
  )
732
 
 
 
 
733
  # 5. Call the LLM for keyword extraction
734
  use_model_func = global_config["llm_model_func"]
735
  result = await use_model_func(kw_prompt, keyword_extraction=True)
@@ -893,7 +900,9 @@ async def mix_kg_vector_query(
893
  chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
894
  formatted_chunks.append(chunk_text)
895
 
896
- logger.info(f"Truncate {len(chunks)} to {len(formatted_chunks)} chunks")
 
 
897
  return "\n--New Chunk--\n".join(formatted_chunks)
898
  except Exception as e:
899
  logger.error(f"Error in get_vector_context: {e}")
@@ -926,6 +935,9 @@ async def mix_kg_vector_query(
926
  if query_param.only_need_prompt:
927
  return sys_prompt
928
 
 
 
 
929
  # 6. Generate response
930
  response = await use_model_func(
931
  query,
@@ -1031,7 +1043,7 @@ async def _build_query_context(
1031
  if not entities_context.strip() and not relations_context.strip():
1032
  return None
1033
 
1034
- return f"""
1035
  -----Entities-----
1036
  ```csv
1037
  {entities_context}
@@ -1045,6 +1057,15 @@ async def _build_query_context(
1045
  {text_units_context}
1046
  ```
1047
  """
 
 
 
 
 
 
 
 
 
1048
 
1049
 
1050
  async def _get_node_data(
@@ -1089,7 +1110,7 @@ async def _get_node_data(
1089
  ),
1090
  )
1091
  logger.info(
1092
- f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} text units"
1093
  )
1094
 
1095
  # build prompt
@@ -1222,6 +1243,10 @@ async def _find_most_related_text_unit_from_entities(
1222
  max_token_size=query_param.max_token_for_text_unit,
1223
  )
1224
 
 
 
 
 
1225
  all_text_units = [t["data"] for t in all_text_units]
1226
  return all_text_units
1227
 
@@ -1263,6 +1288,9 @@ async def _find_most_related_edges_from_entities(
1263
  key=lambda x: x["description"],
1264
  max_token_size=query_param.max_token_for_global_context,
1265
  )
 
 
 
1266
  return all_edges_data
1267
 
1268
 
@@ -1310,11 +1338,13 @@ async def _get_edge_data(
1310
  edge_datas = sorted(
1311
  edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
1312
  )
 
1313
  edge_datas = truncate_list_by_token_size(
1314
  edge_datas,
1315
  key=lambda x: x["description"],
1316
  max_token_size=query_param.max_token_for_global_context,
1317
  )
 
1318
 
1319
  use_entities, use_text_units = await asyncio.gather(
1320
  _find_most_related_entities_from_relationships(
@@ -1325,7 +1355,7 @@ async def _get_edge_data(
1325
  ),
1326
  )
1327
  logger.info(
1328
- f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} text units"
1329
  )
1330
 
1331
  relations_section_list = [
@@ -1414,11 +1444,13 @@ async def _find_most_related_entities_from_relationships(
1414
  for k, n, d in zip(entity_names, node_datas, node_degrees)
1415
  ]
1416
 
 
1417
  node_datas = truncate_list_by_token_size(
1418
  node_datas,
1419
  key=lambda x: x["description"],
1420
  max_token_size=query_param.max_token_for_local_context,
1421
  )
 
1422
 
1423
  return node_datas
1424
 
@@ -1474,6 +1506,10 @@ async def _find_related_text_unit_from_relationships(
1474
  max_token_size=query_param.max_token_for_text_unit,
1475
  )
1476
 
 
 
 
 
1477
  all_text_units: list[TextChunkSchema] = [t["data"] for t in truncated_text_units]
1478
 
1479
  return all_text_units
@@ -1541,7 +1577,8 @@ async def naive_query(
1541
  logger.warning("No chunks left after truncation")
1542
  return PROMPTS["fail_response"]
1543
 
1544
- logger.info(f"Truncate {len(chunks)} to {len(maybe_trun_chunks)} chunks")
 
1545
  section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
1546
 
1547
  if query_param.only_need_context:
@@ -1564,6 +1601,9 @@ async def naive_query(
1564
  if query_param.only_need_prompt:
1565
  return sys_prompt
1566
 
 
 
 
1567
  response = await use_model_func(
1568
  query,
1569
  system_prompt=sys_prompt,
@@ -1706,6 +1746,9 @@ async def kg_query_with_keywords(
1706
  if query_param.only_need_prompt:
1707
  return sys_prompt
1708
 
 
 
 
1709
  response = await use_model_func(
1710
  query,
1711
  system_prompt=sys_prompt,
 
642
  history=history_context,
643
  )
644
 
645
+
646
  if query_param.only_need_prompt:
647
  return sys_prompt
648
 
649
+ len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
650
+ logger.info(f"[kg_query]Prompt Tokens: {len_of_prompts}")
651
+
652
  response = await use_model_func(
653
  query,
654
  system_prompt=sys_prompt,
 
734
  query=text, examples=examples, language=language, history=history_context
735
  )
736
 
737
+ len_of_prompts = len(encode_string_by_tiktoken(kw_prompt))
738
+ logger.info(f"[kg_query]Prompt Tokens: {len_of_prompts}")
739
+
740
  # 5. Call the LLM for keyword extraction
741
  use_model_func = global_config["llm_model_func"]
742
  result = await use_model_func(kw_prompt, keyword_extraction=True)
 
900
  chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
901
  formatted_chunks.append(chunk_text)
902
 
903
+ logger.info(
904
+ f"Truncate text chunks from {len(chunks)} to {len(formatted_chunks)}"
905
+ )
906
  return "\n--New Chunk--\n".join(formatted_chunks)
907
  except Exception as e:
908
  logger.error(f"Error in get_vector_context: {e}")
 
935
  if query_param.only_need_prompt:
936
  return sys_prompt
937
 
938
+ len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
939
+ logger.info(f"[mix_kg_vector_query]Prompt Tokens: {len_of_prompts}")
940
+
941
  # 6. Generate response
942
  response = await use_model_func(
943
  query,
 
1043
  if not entities_context.strip() and not relations_context.strip():
1044
  return None
1045
 
1046
+ result = f"""
1047
  -----Entities-----
1048
  ```csv
1049
  {entities_context}
 
1057
  {text_units_context}
1058
  ```
1059
  """
1060
+ contex_tokens = len(encode_string_by_tiktoken(result))
1061
+ entities_tokens = len(encode_string_by_tiktoken(entities_context))
1062
+ relations_tokens = len(encode_string_by_tiktoken(relations_context))
1063
+ text_units_tokens = len(encode_string_by_tiktoken(text_units_context))
1064
+ logger.info(
1065
+ f"Context Tokens - Total: {contex_tokens}, Entities: {entities_tokens}, Relations: {relations_tokens}, Chunks: {text_units_tokens}"
1066
+ )
1067
+
1068
+ return result
1069
 
1070
 
1071
  async def _get_node_data(
 
1110
  ),
1111
  )
1112
  logger.info(
1113
+ f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} chunks"
1114
  )
1115
 
1116
  # build prompt
 
1243
  max_token_size=query_param.max_token_for_text_unit,
1244
  )
1245
 
1246
+ logger.info(
1247
+ f"Truncate text chunks from {len(all_text_units_lookup)} to {len(all_text_units)}"
1248
+ )
1249
+
1250
  all_text_units = [t["data"] for t in all_text_units]
1251
  return all_text_units
1252
 
 
1288
  key=lambda x: x["description"],
1289
  max_token_size=query_param.max_token_for_global_context,
1290
  )
1291
+
1292
+ logger.info(f"Truncate relations from {len(all_edges)} to {len(all_edges_data)}")
1293
+
1294
  return all_edges_data
1295
 
1296
 
 
1338
  edge_datas = sorted(
1339
  edge_datas, key=lambda x: (x["rank"], x["weight"]), reverse=True
1340
  )
1341
+ len_edge_datas = len(edge_datas)
1342
  edge_datas = truncate_list_by_token_size(
1343
  edge_datas,
1344
  key=lambda x: x["description"],
1345
  max_token_size=query_param.max_token_for_global_context,
1346
  )
1347
+ logger.info(f"Truncate relations from {len_edge_datas} to {len(edge_datas)}")
1348
 
1349
  use_entities, use_text_units = await asyncio.gather(
1350
  _find_most_related_entities_from_relationships(
 
1355
  ),
1356
  )
1357
  logger.info(
1358
+ f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
1359
  )
1360
 
1361
  relations_section_list = [
 
1444
  for k, n, d in zip(entity_names, node_datas, node_degrees)
1445
  ]
1446
 
1447
+ len_node_datas = len(node_datas)
1448
  node_datas = truncate_list_by_token_size(
1449
  node_datas,
1450
  key=lambda x: x["description"],
1451
  max_token_size=query_param.max_token_for_local_context,
1452
  )
1453
+ logger.info(f"Truncate entities from {len_node_datas} to {len(node_datas)}")
1454
 
1455
  return node_datas
1456
 
 
1506
  max_token_size=query_param.max_token_for_text_unit,
1507
  )
1508
 
1509
+ logger.info(
1510
+ f"Truncate text chunks from {len(valid_text_units)} to {len(truncated_text_units)}"
1511
+ )
1512
+
1513
  all_text_units: list[TextChunkSchema] = [t["data"] for t in truncated_text_units]
1514
 
1515
  return all_text_units
 
1577
  logger.warning("No chunks left after truncation")
1578
  return PROMPTS["fail_response"]
1579
 
1580
+ logger.info(f"Truncate text chunks from {len(chunks)} to {len(maybe_trun_chunks)}")
1581
+
1582
  section = "\n--New Chunk--\n".join([c["content"] for c in maybe_trun_chunks])
1583
 
1584
  if query_param.only_need_context:
 
1601
  if query_param.only_need_prompt:
1602
  return sys_prompt
1603
 
1604
+ len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
1605
+ logger.info(f"[naive_query]Prompt Tokens: {len_of_prompts}")
1606
+
1607
  response = await use_model_func(
1608
  query,
1609
  system_prompt=sys_prompt,
 
1746
  if query_param.only_need_prompt:
1747
  return sys_prompt
1748
 
1749
+ len_of_prompts = len(encode_string_by_tiktoken(query + sys_prompt))
1750
+ logger.info(f"[kg_query_with_keywords]Prompt Tokens: {len_of_prompts}")
1751
+
1752
  response = await use_model_func(
1753
  query,
1754
  system_prompt=sys_prompt,