Magicyuan commited on
Commit
6eb6fff
·
1 Parent(s): 5370a5b

支持多轮对话

Browse files
Files changed (5) hide show
  1. README.md +28 -0
  2. lightrag/base.py +7 -0
  3. lightrag/operate.py +330 -337
  4. lightrag/prompt.py +26 -14
  5. lightrag/utils.py +81 -6
README.md CHANGED
@@ -119,6 +119,34 @@ print(rag.query("What are the top themes in this story?", param=QueryParam(mode=
119
  print(rag.query("What are the top themes in this story?", param=QueryParam(
120
  mode="mix")))
121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  ```
123
 
124
 
 
119
  print(rag.query("What are the top themes in this story?", param=QueryParam(
120
  mode="mix")))
121
 
122
+ ### Conversation History Support
123
+ LightRAG now supports multi-turn dialogue through the conversation history feature. Here's how to use it:
124
+
125
+ ```python
126
+ from lightrag import LightRAG, QueryParam
127
+
128
+ # Initialize LightRAG
129
+ rag = LightRAG(working_dir=WORKING_DIR)
130
+
131
+ # Create conversation history
132
+ conversation_history = [
133
+ {"role": "user", "content": "What is the main character's attitude towards Christmas?"},
134
+ {"role": "assistant", "content": "At the beginning of the story, Ebenezer Scrooge has a very negative attitude towards Christmas..."},
135
+ {"role": "user", "content": "How does his attitude change?"}
136
+ ]
137
+
138
+ # Create query parameters with conversation history
139
+ query_param = QueryParam(
140
+ mode="mix", # or any other mode: "local", "global", "hybrid"
141
+ conversation_history=conversation_history, # Add the conversation history
142
+ history_turns=3 # Number of recent conversation turns to consider
143
+ )
144
+
145
+ # Make a query that takes into account the conversation history
146
+ response = rag.query(
147
+ "What causes this change in his character?",
148
+ param=query_param
149
+ )
150
  ```
151
 
152
 
lightrag/base.py CHANGED
@@ -33,6 +33,13 @@ class QueryParam:
33
  max_token_for_local_context: int = 4000
34
  hl_keywords: list[str] = field(default_factory=list)
35
  ll_keywords: list[str] = field(default_factory=list)
 
 
 
 
 
 
 
36
 
37
 
38
  @dataclass
 
33
  max_token_for_local_context: int = 4000
34
  hl_keywords: list[str] = field(default_factory=list)
35
  ll_keywords: list[str] = field(default_factory=list)
36
+ # Conversation history support
37
+ conversation_history: list[dict] = field(
38
+ default_factory=list
39
+ ) # Format: [{"role": "user/assistant", "content": "message"}]
40
+ history_turns: int = (
41
+ 3 # Number of complete conversation turns (user-assistant pairs) to consider
42
+ )
43
 
44
 
45
  @dataclass
lightrag/operate.py CHANGED
@@ -21,6 +21,7 @@ from .utils import (
21
  save_to_cache,
22
  CacheData,
23
  statistic_data,
 
24
  )
25
  from .base import (
26
  BaseGraphStorage,
@@ -369,7 +370,7 @@ async def extract_entities(
369
 
370
  arg_hash = compute_args_hash(_prompt)
371
  cached_return, _1, _2, _3 = await handle_cache(
372
- llm_response_cache, arg_hash, _prompt, "default"
373
  )
374
  if need_to_restore:
375
  llm_response_cache.global_config = global_config
@@ -576,54 +577,19 @@ async def kg_query(
576
  ) -> str:
577
  # Handle cache
578
  use_model_func = global_config["llm_model_func"]
579
- args_hash = compute_args_hash(query_param.mode, query)
580
  cached_response, quantized, min_val, max_val = await handle_cache(
581
- hashing_kv, args_hash, query, query_param.mode
582
  )
583
  if cached_response is not None:
584
  return cached_response
585
 
586
- example_number = global_config["addon_params"].get("example_number", None)
587
- if example_number and example_number < len(PROMPTS["keywords_extraction_examples"]):
588
- examples = "\n".join(
589
- PROMPTS["keywords_extraction_examples"][: int(example_number)]
590
- )
591
- else:
592
- examples = "\n".join(PROMPTS["keywords_extraction_examples"])
593
- language = global_config["addon_params"].get(
594
- "language", PROMPTS["DEFAULT_LANGUAGE"]
595
  )
596
 
597
- # Set mode
598
- if query_param.mode not in ["local", "global", "hybrid"]:
599
- logger.error(f"Unknown mode {query_param.mode} in kg_query")
600
- return PROMPTS["fail_response"]
601
-
602
- # LLM generate keywords
603
- kw_prompt_temp = PROMPTS["keywords_extraction"]
604
- kw_prompt = kw_prompt_temp.format(query=query, examples=examples, language=language)
605
- result = await use_model_func(kw_prompt, keyword_extraction=True)
606
- logger.info("kw_prompt result:")
607
- print(result)
608
- try:
609
- # json_text = locate_json_string_body_from_string(result) # handled in use_model_func
610
- match = re.search(r"\{.*\}", result, re.DOTALL)
611
- if match:
612
- result = match.group(0)
613
- keywords_data = json.loads(result)
614
-
615
- hl_keywords = keywords_data.get("high_level_keywords", [])
616
- ll_keywords = keywords_data.get("low_level_keywords", [])
617
- else:
618
- logger.error("No JSON-like structure found in the result.")
619
- return PROMPTS["fail_response"]
620
-
621
- # Handle parsing error
622
- except json.JSONDecodeError as e:
623
- print(f"JSON parsing error: {e} {result}")
624
- return PROMPTS["fail_response"]
625
-
626
- # Handdle keywords missing
627
  if hl_keywords == [] and ll_keywords == []:
628
  logger.warning("low_level_keywords and high_level_keywords is empty")
629
  return PROMPTS["fail_response"]
@@ -660,149 +626,32 @@ async def kg_query(
660
  return context
661
  if context is None:
662
  return PROMPTS["fail_response"]
663
- sys_prompt_temp = PROMPTS["rag_response"]
664
- sys_prompt = sys_prompt_temp.format(
665
- context_data=context, response_type=query_param.response_type
666
- )
667
- if query_param.only_need_prompt:
668
- return sys_prompt
669
- response = await use_model_func(
670
- query,
671
- system_prompt=sys_prompt,
672
- stream=query_param.stream,
673
- )
674
- if isinstance(response, str) and len(response) > len(sys_prompt):
675
- response = (
676
- response.replace(sys_prompt, "")
677
- .replace("user", "")
678
- .replace("model", "")
679
- .replace(query, "")
680
- .replace("<system>", "")
681
- .replace("</system>", "")
682
- .strip()
683
- )
684
 
685
- # Save to cache
686
- await save_to_cache(
687
- hashing_kv,
688
- CacheData(
689
- args_hash=args_hash,
690
- content=response,
691
- prompt=query,
692
- quantized=quantized,
693
- min_val=min_val,
694
- max_val=max_val,
695
- mode=query_param.mode,
696
- ),
697
- )
698
- return response
699
-
700
-
701
- async def kg_query_with_keywords(
702
- query: str,
703
- knowledge_graph_inst: BaseGraphStorage,
704
- entities_vdb: BaseVectorStorage,
705
- relationships_vdb: BaseVectorStorage,
706
- text_chunks_db: BaseKVStorage[TextChunkSchema],
707
- query_param: QueryParam,
708
- global_config: dict,
709
- hashing_kv: BaseKVStorage = None,
710
- ) -> str:
711
- """
712
- Refactored kg_query that does NOT extract keywords by itself.
713
- It expects hl_keywords and ll_keywords to be set in query_param, or defaults to empty.
714
- Then it uses those to build context and produce a final LLM response.
715
- """
716
-
717
- # ---------------------------
718
- # 0) Handle potential cache
719
- # ---------------------------
720
- use_model_func = global_config["llm_model_func"]
721
- args_hash = compute_args_hash(query_param.mode, query)
722
- cached_response, quantized, min_val, max_val = await handle_cache(
723
- hashing_kv, args_hash, query, query_param.mode
724
- )
725
- if cached_response is not None:
726
- return cached_response
727
-
728
- # ---------------------------
729
- # 1) RETRIEVE KEYWORDS FROM query_param
730
- # ---------------------------
731
-
732
- # If these fields don't exist, default to empty lists/strings.
733
- hl_keywords = getattr(query_param, "hl_keywords", []) or []
734
- ll_keywords = getattr(query_param, "ll_keywords", []) or []
735
-
736
- # If neither has any keywords, you could handle that logic here.
737
- if not hl_keywords and not ll_keywords:
738
- logger.warning(
739
- "No keywords found in query_param. Could default to global mode or fail."
740
  )
741
- return PROMPTS["fail_response"]
742
- if not ll_keywords and query_param.mode in ["local", "hybrid"]:
743
- logger.warning("low_level_keywords is empty, switching to global mode.")
744
- query_param.mode = "global"
745
- if not hl_keywords and query_param.mode in ["global", "hybrid"]:
746
- logger.warning("high_level_keywords is empty, switching to local mode.")
747
- query_param.mode = "local"
748
-
749
- # Flatten low-level and high-level keywords if needed
750
- ll_keywords_flat = (
751
- [item for sublist in ll_keywords for item in sublist]
752
- if any(isinstance(i, list) for i in ll_keywords)
753
- else ll_keywords
754
- )
755
- hl_keywords_flat = (
756
- [item for sublist in hl_keywords for item in sublist]
757
- if any(isinstance(i, list) for i in hl_keywords)
758
- else hl_keywords
759
- )
760
-
761
- # Join the flattened lists
762
- ll_keywords_str = ", ".join(ll_keywords_flat) if ll_keywords_flat else ""
763
- hl_keywords_str = ", ".join(hl_keywords_flat) if hl_keywords_flat else ""
764
-
765
- keywords = [ll_keywords_str, hl_keywords_str]
766
 
767
- logger.info("Using %s mode for query processing", query_param.mode)
768
-
769
- # ---------------------------
770
- # 2) BUILD CONTEXT
771
- # ---------------------------
772
- context = await _build_query_context(
773
- keywords,
774
- knowledge_graph_inst,
775
- entities_vdb,
776
- relationships_vdb,
777
- text_chunks_db,
778
- query_param,
779
- )
780
- if not context:
781
- return PROMPTS["fail_response"]
782
-
783
- # If only context is needed, return it
784
- if query_param.only_need_context:
785
- return context
786
-
787
- # ---------------------------
788
- # 3) BUILD THE SYSTEM PROMPT + CALL LLM
789
- # ---------------------------
790
  sys_prompt_temp = PROMPTS["rag_response"]
791
  sys_prompt = sys_prompt_temp.format(
792
- context_data=context, response_type=query_param.response_type
 
 
793
  )
794
 
795
  if query_param.only_need_prompt:
796
  return sys_prompt
797
 
798
- # Now call the LLM with the final system prompt
799
  response = await use_model_func(
800
  query,
801
  system_prompt=sys_prompt,
802
  stream=query_param.stream,
803
  )
804
-
805
- # Clean up the response
806
  if isinstance(response, str) and len(response) > len(sys_prompt):
807
  response = (
808
  response.replace(sys_prompt, "")
@@ -814,9 +663,7 @@ async def kg_query_with_keywords(
814
  .strip()
815
  )
816
 
817
- # ---------------------------
818
- # 4) SAVE TO CACHE
819
- # ---------------------------
820
  await save_to_cache(
821
  hashing_kv,
822
  CacheData(
@@ -827,6 +674,7 @@ async def kg_query_with_keywords(
827
  min_val=min_val,
828
  max_val=max_val,
829
  mode=query_param.mode,
 
830
  ),
831
  )
832
  return response
@@ -844,22 +692,21 @@ async def extract_keywords_only(
844
  It ONLY extracts keywords (hl_keywords, ll_keywords).
845
  """
846
 
847
- # 1. Handle cache if needed
848
- args_hash = compute_args_hash(param.mode, text)
849
  cached_response, quantized, min_val, max_val = await handle_cache(
850
- hashing_kv, args_hash, text, param.mode
851
  )
852
  if cached_response is not None:
853
- # parse the cached_response if it’s JSON containing keywords
854
- # or simply return (hl_keywords, ll_keywords) from cached
855
- # Assuming cached_response is in the same JSON structure:
856
- match = re.search(r"\{.*\}", cached_response, re.DOTALL)
857
- if match:
858
- keywords_data = json.loads(match.group(0))
859
- hl_keywords = keywords_data.get("high_level_keywords", [])
860
- ll_keywords = keywords_data.get("low_level_keywords", [])
861
- return hl_keywords, ll_keywords
862
- return [], []
863
 
864
  # 2. Build the examples
865
  example_number = global_config["addon_params"].get("example_number", None)
@@ -873,15 +720,23 @@ async def extract_keywords_only(
873
  "language", PROMPTS["DEFAULT_LANGUAGE"]
874
  )
875
 
876
- # 3. Build the keyword-extraction prompt
877
- kw_prompt_temp = PROMPTS["keywords_extraction"]
878
- kw_prompt = kw_prompt_temp.format(query=text, examples=examples, language=language)
 
 
 
879
 
880
- # 4. Call the LLM for keyword extraction
 
 
 
 
 
881
  use_model_func = global_config["llm_model_func"]
882
  result = await use_model_func(kw_prompt, keyword_extraction=True)
883
 
884
- # 5. Parse out JSON from the LLM response
885
  match = re.search(r"\{.*\}", result, re.DOTALL)
886
  if not match:
887
  logger.error("No JSON-like structure found in the result.")
@@ -895,22 +750,225 @@ async def extract_keywords_only(
895
  hl_keywords = keywords_data.get("high_level_keywords", [])
896
  ll_keywords = keywords_data.get("low_level_keywords", [])
897
 
898
- # 6. Cache the result if needed
 
899
  await save_to_cache(
900
  hashing_kv,
901
  CacheData(
902
  args_hash=args_hash,
903
- content=result,
904
  prompt=text,
905
  quantized=quantized,
906
  min_val=min_val,
907
  max_val=max_val,
908
  mode=param.mode,
 
909
  ),
910
  )
911
  return hl_keywords, ll_keywords
912
 
913
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
914
  async def _build_query_context(
915
  query: list,
916
  knowledge_graph_inst: BaseGraphStorage,
@@ -1407,9 +1465,9 @@ async def naive_query(
1407
  ):
1408
  # Handle cache
1409
  use_model_func = global_config["llm_model_func"]
1410
- args_hash = compute_args_hash(query_param.mode, query)
1411
  cached_response, quantized, min_val, max_val = await handle_cache(
1412
- hashing_kv, args_hash, query, query_param.mode
1413
  )
1414
  if cached_response is not None:
1415
  return cached_response
@@ -1482,190 +1540,125 @@ async def naive_query(
1482
  min_val=min_val,
1483
  max_val=max_val,
1484
  mode=query_param.mode,
 
1485
  ),
1486
  )
1487
 
1488
  return response
1489
 
1490
 
1491
- async def mix_kg_vector_query(
1492
- query,
1493
  knowledge_graph_inst: BaseGraphStorage,
1494
  entities_vdb: BaseVectorStorage,
1495
  relationships_vdb: BaseVectorStorage,
1496
- chunks_vdb: BaseVectorStorage,
1497
  text_chunks_db: BaseKVStorage[TextChunkSchema],
1498
  query_param: QueryParam,
1499
  global_config: dict,
1500
  hashing_kv: BaseKVStorage = None,
1501
  ) -> str:
1502
  """
1503
- Hybrid retrieval implementation combining knowledge graph and vector search.
1504
-
1505
- This function performs a hybrid search by:
1506
- 1. Extracting semantic information from knowledge graph
1507
- 2. Retrieving relevant text chunks through vector similarity
1508
- 3. Combining both results for comprehensive answer generation
1509
  """
1510
- # 1. Cache handling
 
 
 
1511
  use_model_func = global_config["llm_model_func"]
1512
- args_hash = compute_args_hash("mix", query)
1513
  cached_response, quantized, min_val, max_val = await handle_cache(
1514
- hashing_kv, args_hash, query, "mix"
1515
  )
1516
  if cached_response is not None:
1517
  return cached_response
1518
 
1519
- # 2. Execute knowledge graph and vector searches in parallel
1520
- async def get_kg_context():
1521
- try:
1522
- # Reuse keyword extraction logic from kg_query
1523
- example_number = global_config["addon_params"].get("example_number", None)
1524
- if example_number and example_number < len(
1525
- PROMPTS["keywords_extraction_examples"]
1526
- ):
1527
- examples = "\n".join(
1528
- PROMPTS["keywords_extraction_examples"][: int(example_number)]
1529
- )
1530
- else:
1531
- examples = "\n".join(PROMPTS["keywords_extraction_examples"])
1532
-
1533
- language = global_config["addon_params"].get(
1534
- "language", PROMPTS["DEFAULT_LANGUAGE"]
1535
- )
1536
-
1537
- # Extract keywords using LLM
1538
- kw_prompt = PROMPTS["keywords_extraction"].format(
1539
- query=query, examples=examples, language=language
1540
- )
1541
- result = await use_model_func(kw_prompt, keyword_extraction=True)
1542
-
1543
- match = re.search(r"\{.*\}", result, re.DOTALL)
1544
- if not match:
1545
- logger.warning(
1546
- "No JSON-like structure found in keywords extraction result"
1547
- )
1548
- return None
1549
-
1550
- result = match.group(0)
1551
- keywords_data = json.loads(result)
1552
- hl_keywords = keywords_data.get("high_level_keywords", [])
1553
- ll_keywords = keywords_data.get("low_level_keywords", [])
1554
-
1555
- if not hl_keywords and not ll_keywords:
1556
- logger.warning("Both high-level and low-level keywords are empty")
1557
- return None
1558
-
1559
- # Convert keyword lists to strings
1560
- ll_keywords_str = ", ".join(ll_keywords) if ll_keywords else ""
1561
- hl_keywords_str = ", ".join(hl_keywords) if hl_keywords else ""
1562
-
1563
- # Set query mode based on available keywords
1564
- if not ll_keywords_str and not hl_keywords_str:
1565
- return None
1566
- elif not ll_keywords_str:
1567
- query_param.mode = "global"
1568
- elif not hl_keywords_str:
1569
- query_param.mode = "local"
1570
- else:
1571
- query_param.mode = "hybrid"
1572
-
1573
- # Build knowledge graph context
1574
- context = await _build_query_context(
1575
- [ll_keywords_str, hl_keywords_str],
1576
- knowledge_graph_inst,
1577
- entities_vdb,
1578
- relationships_vdb,
1579
- text_chunks_db,
1580
- query_param,
1581
- )
1582
-
1583
- return context
1584
-
1585
- except Exception as e:
1586
- logger.error(f"Error in get_kg_context: {str(e)}")
1587
- return None
1588
-
1589
- async def get_vector_context():
1590
- # Reuse vector search logic from naive_query
1591
- try:
1592
- # Reduce top_k for vector search in hybrid mode since we have structured information from KG
1593
- mix_topk = min(10, query_param.top_k)
1594
- results = await chunks_vdb.query(query, top_k=mix_topk)
1595
- if not results:
1596
- return None
1597
-
1598
- chunks_ids = [r["id"] for r in results]
1599
- chunks = await text_chunks_db.get_by_ids(chunks_ids)
1600
 
1601
- valid_chunks = []
1602
- for chunk, result in zip(chunks, results):
1603
- if chunk is not None and "content" in chunk:
1604
- # Merge chunk content and time metadata
1605
- chunk_with_time = {
1606
- "content": chunk["content"],
1607
- "created_at": result.get("created_at", None),
1608
- }
1609
- valid_chunks.append(chunk_with_time)
1610
 
1611
- if not valid_chunks:
1612
- return None
 
 
 
 
 
 
 
 
 
 
1613
 
1614
- maybe_trun_chunks = truncate_list_by_token_size(
1615
- valid_chunks,
1616
- key=lambda x: x["content"],
1617
- max_token_size=query_param.max_token_for_text_unit,
1618
- )
 
 
 
 
 
 
1619
 
1620
- if not maybe_trun_chunks:
1621
- return None
 
1622
 
1623
- # Include time information in content
1624
- formatted_chunks = []
1625
- for c in maybe_trun_chunks:
1626
- chunk_text = c["content"]
1627
- if c["created_at"]:
1628
- chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
1629
- formatted_chunks.append(chunk_text)
1630
 
1631
- return "\n--New Chunk--\n".join(formatted_chunks)
1632
- except Exception as e:
1633
- logger.error(f"Error in get_vector_context: {e}")
1634
- return None
1635
 
1636
- # 3. Execute both retrievals in parallel
1637
- kg_context, vector_context = await asyncio.gather(
1638
- get_kg_context(), get_vector_context()
 
 
 
 
 
 
 
1639
  )
1640
-
1641
- # 4. Merge contexts
1642
- if kg_context is None and vector_context is None:
1643
  return PROMPTS["fail_response"]
1644
 
 
1645
  if query_param.only_need_context:
1646
- return {"kg_context": kg_context, "vector_context": vector_context}
1647
 
1648
- # 5. Construct hybrid prompt
1649
- sys_prompt = PROMPTS["mix_rag_response"].format(
1650
- kg_context=kg_context
1651
- if kg_context
1652
- else "No relevant knowledge graph information found",
1653
- vector_context=vector_context
1654
- if vector_context
1655
- else "No relevant text information found",
 
 
 
 
 
 
1656
  response_type=query_param.response_type,
 
1657
  )
1658
 
1659
  if query_param.only_need_prompt:
1660
  return sys_prompt
1661
 
1662
- # 6. Generate response
1663
  response = await use_model_func(
1664
  query,
1665
  system_prompt=sys_prompt,
1666
  stream=query_param.stream,
1667
  )
1668
-
1669
  if isinstance(response, str) and len(response) > len(sys_prompt):
1670
  response = (
1671
  response.replace(sys_prompt, "")
@@ -1677,7 +1670,7 @@ async def mix_kg_vector_query(
1677
  .strip()
1678
  )
1679
 
1680
- # 7. Save cache
1681
  await save_to_cache(
1682
  hashing_kv,
1683
  CacheData(
@@ -1687,8 +1680,8 @@ async def mix_kg_vector_query(
1687
  quantized=quantized,
1688
  min_val=min_val,
1689
  max_val=max_val,
1690
- mode="mix",
 
1691
  ),
1692
  )
1693
-
1694
  return response
 
21
  save_to_cache,
22
  CacheData,
23
  statistic_data,
24
+ get_conversation_turns,
25
  )
26
  from .base import (
27
  BaseGraphStorage,
 
370
 
371
  arg_hash = compute_args_hash(_prompt)
372
  cached_return, _1, _2, _3 = await handle_cache(
373
+ llm_response_cache, arg_hash, _prompt, "default", cache_type="default"
374
  )
375
  if need_to_restore:
376
  llm_response_cache.global_config = global_config
 
577
  ) -> str:
578
  # Handle cache
579
  use_model_func = global_config["llm_model_func"]
580
+ args_hash = compute_args_hash(query_param.mode, query, cache_type="query")
581
  cached_response, quantized, min_val, max_val = await handle_cache(
582
+ hashing_kv, args_hash, query, query_param.mode, cache_type="query"
583
  )
584
  if cached_response is not None:
585
  return cached_response
586
 
587
+ # Extract keywords using extract_keywords_only function which already supports conversation history
588
+ hl_keywords, ll_keywords = await extract_keywords_only(
589
+ query, query_param, global_config, hashing_kv
 
 
 
 
 
 
590
  )
591
 
592
+ # Handle empty keywords
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593
  if hl_keywords == [] and ll_keywords == []:
594
  logger.warning("low_level_keywords and high_level_keywords is empty")
595
  return PROMPTS["fail_response"]
 
626
  return context
627
  if context is None:
628
  return PROMPTS["fail_response"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
629
 
630
+ # Process conversation history
631
+ history_context = ""
632
+ if query_param.conversation_history:
633
+ recent_history = query_param.conversation_history[
634
+ -query_param.history_window_size :
635
+ ]
636
+ history_context = "\n".join(
637
+ [f"{turn['role']}: {turn['content']}" for turn in recent_history]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
638
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
639
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
640
  sys_prompt_temp = PROMPTS["rag_response"]
641
  sys_prompt = sys_prompt_temp.format(
642
+ context_data=context,
643
+ response_type=query_param.response_type,
644
+ history=history_context,
645
  )
646
 
647
  if query_param.only_need_prompt:
648
  return sys_prompt
649
 
 
650
  response = await use_model_func(
651
  query,
652
  system_prompt=sys_prompt,
653
  stream=query_param.stream,
654
  )
 
 
655
  if isinstance(response, str) and len(response) > len(sys_prompt):
656
  response = (
657
  response.replace(sys_prompt, "")
 
663
  .strip()
664
  )
665
 
666
+ # Save to cache
 
 
667
  await save_to_cache(
668
  hashing_kv,
669
  CacheData(
 
674
  min_val=min_val,
675
  max_val=max_val,
676
  mode=query_param.mode,
677
+ cache_type="query",
678
  ),
679
  )
680
  return response
 
692
  It ONLY extracts keywords (hl_keywords, ll_keywords).
693
  """
694
 
695
+ # 1. Handle cache if needed - add cache type for keywords
696
+ args_hash = compute_args_hash(param.mode, text, cache_type="keywords")
697
  cached_response, quantized, min_val, max_val = await handle_cache(
698
+ hashing_kv, args_hash, text, param.mode, cache_type="keywords"
699
  )
700
  if cached_response is not None:
701
+ try:
702
+ keywords_data = json.loads(cached_response)
703
+ return keywords_data["high_level_keywords"], keywords_data[
704
+ "low_level_keywords"
705
+ ]
706
+ except (json.JSONDecodeError, KeyError):
707
+ logger.warning(
708
+ "Invalid cache format for keywords, proceeding with extraction"
709
+ )
 
710
 
711
  # 2. Build the examples
712
  example_number = global_config["addon_params"].get("example_number", None)
 
720
  "language", PROMPTS["DEFAULT_LANGUAGE"]
721
  )
722
 
723
+ # 3. Process conversation history
724
+ history_context = ""
725
+ if param.conversation_history:
726
+ history_context = get_conversation_turns(
727
+ param.conversation_history, param.history_turns
728
+ )
729
 
730
+ # 4. Build the keyword-extraction prompt
731
+ kw_prompt = PROMPTS["keywords_extraction"].format(
732
+ query=text, examples=examples, language=language, history=history_context
733
+ )
734
+
735
+ # 5. Call the LLM for keyword extraction
736
  use_model_func = global_config["llm_model_func"]
737
  result = await use_model_func(kw_prompt, keyword_extraction=True)
738
 
739
+ # 6. Parse out JSON from the LLM response
740
  match = re.search(r"\{.*\}", result, re.DOTALL)
741
  if not match:
742
  logger.error("No JSON-like structure found in the result.")
 
750
  hl_keywords = keywords_data.get("high_level_keywords", [])
751
  ll_keywords = keywords_data.get("low_level_keywords", [])
752
 
753
+ # 7. Cache only the processed keywords with cache type
754
+ cache_data = {"high_level_keywords": hl_keywords, "low_level_keywords": ll_keywords}
755
  await save_to_cache(
756
  hashing_kv,
757
  CacheData(
758
  args_hash=args_hash,
759
+ content=json.dumps(cache_data),
760
  prompt=text,
761
  quantized=quantized,
762
  min_val=min_val,
763
  max_val=max_val,
764
  mode=param.mode,
765
+ cache_type="keywords",
766
  ),
767
  )
768
  return hl_keywords, ll_keywords
769
 
770
 
771
+ async def mix_kg_vector_query(
772
+ query: str,
773
+ knowledge_graph_inst: BaseGraphStorage,
774
+ entities_vdb: BaseVectorStorage,
775
+ relationships_vdb: BaseVectorStorage,
776
+ chunks_vdb: BaseVectorStorage,
777
+ text_chunks_db: BaseKVStorage[TextChunkSchema],
778
+ query_param: QueryParam,
779
+ global_config: dict,
780
+ hashing_kv: BaseKVStorage = None,
781
+ ) -> str:
782
+ """
783
+ Hybrid retrieval implementation combining knowledge graph and vector search.
784
+
785
+ This function performs a hybrid search by:
786
+ 1. Extracting semantic information from knowledge graph
787
+ 2. Retrieving relevant text chunks through vector similarity
788
+ 3. Combining both results for comprehensive answer generation
789
+ """
790
+ # 1. Cache handling
791
+ use_model_func = global_config["llm_model_func"]
792
+ args_hash = compute_args_hash("mix", query, cache_type="query")
793
+ cached_response, quantized, min_val, max_val = await handle_cache(
794
+ hashing_kv, args_hash, query, "mix", cache_type="query"
795
+ )
796
+ if cached_response is not None:
797
+ return cached_response
798
+
799
+ # Process conversation history
800
+ history_context = ""
801
+ if query_param.conversation_history:
802
+ history_context = get_conversation_turns(
803
+ query_param.conversation_history, query_param.history_turns
804
+ )
805
+
806
+ # 2. Execute knowledge graph and vector searches in parallel
807
+ async def get_kg_context():
808
+ try:
809
+ # Extract keywords using extract_keywords_only function which already supports conversation history
810
+ hl_keywords, ll_keywords = await extract_keywords_only(
811
+ query, query_param, global_config, hashing_kv
812
+ )
813
+
814
+ if not hl_keywords and not ll_keywords:
815
+ logger.warning("Both high-level and low-level keywords are empty")
816
+ return None
817
+
818
+ # Convert keyword lists to strings
819
+ ll_keywords_str = ", ".join(ll_keywords) if ll_keywords else ""
820
+ hl_keywords_str = ", ".join(hl_keywords) if hl_keywords else ""
821
+
822
+ # Set query mode based on available keywords
823
+ if not ll_keywords_str and not hl_keywords_str:
824
+ return None
825
+ elif not ll_keywords_str:
826
+ query_param.mode = "global"
827
+ elif not hl_keywords_str:
828
+ query_param.mode = "local"
829
+ else:
830
+ query_param.mode = "hybrid"
831
+
832
+ # Build knowledge graph context
833
+ context = await _build_query_context(
834
+ [ll_keywords_str, hl_keywords_str],
835
+ knowledge_graph_inst,
836
+ entities_vdb,
837
+ relationships_vdb,
838
+ text_chunks_db,
839
+ query_param,
840
+ )
841
+
842
+ return context
843
+
844
+ except Exception as e:
845
+ logger.error(f"Error in get_kg_context: {str(e)}")
846
+ return None
847
+
848
+ async def get_vector_context():
849
+ # Consider conversation history in vector search
850
+ augmented_query = query
851
+ if history_context:
852
+ augmented_query = f"{history_context}\n{query}"
853
+
854
+ try:
855
+ # Reduce top_k for vector search in hybrid mode since we have structured information from KG
856
+ mix_topk = min(10, query_param.top_k)
857
+ results = await chunks_vdb.query(augmented_query, top_k=mix_topk)
858
+ if not results:
859
+ return None
860
+
861
+ chunks_ids = [r["id"] for r in results]
862
+ chunks = await text_chunks_db.get_by_ids(chunks_ids)
863
+
864
+ valid_chunks = []
865
+ for chunk, result in zip(chunks, results):
866
+ if chunk is not None and "content" in chunk:
867
+ # Merge chunk content and time metadata
868
+ chunk_with_time = {
869
+ "content": chunk["content"],
870
+ "created_at": result.get("created_at", None),
871
+ }
872
+ valid_chunks.append(chunk_with_time)
873
+
874
+ if not valid_chunks:
875
+ return None
876
+
877
+ maybe_trun_chunks = truncate_list_by_token_size(
878
+ valid_chunks,
879
+ key=lambda x: x["content"],
880
+ max_token_size=query_param.max_token_for_text_unit,
881
+ )
882
+
883
+ if not maybe_trun_chunks:
884
+ return None
885
+
886
+ # Include time information in content
887
+ formatted_chunks = []
888
+ for c in maybe_trun_chunks:
889
+ chunk_text = c["content"]
890
+ if c["created_at"]:
891
+ chunk_text = f"[Created at: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(c['created_at']))}]\n{chunk_text}"
892
+ formatted_chunks.append(chunk_text)
893
+
894
+ return "\n--New Chunk--\n".join(formatted_chunks)
895
+ except Exception as e:
896
+ logger.error(f"Error in get_vector_context: {e}")
897
+ return None
898
+
899
+ # 3. Execute both retrievals in parallel
900
+ kg_context, vector_context = await asyncio.gather(
901
+ get_kg_context(), get_vector_context()
902
+ )
903
+
904
+ # 4. Merge contexts
905
+ if kg_context is None and vector_context is None:
906
+ return PROMPTS["fail_response"]
907
+
908
+ if query_param.only_need_context:
909
+ return {"kg_context": kg_context, "vector_context": vector_context}
910
+
911
+ # 5. Construct hybrid prompt
912
+ sys_prompt = PROMPTS["mix_rag_response"].format(
913
+ kg_context=kg_context
914
+ if kg_context
915
+ else "No relevant knowledge graph information found",
916
+ vector_context=vector_context
917
+ if vector_context
918
+ else "No relevant text information found",
919
+ response_type=query_param.response_type,
920
+ history=history_context,
921
+ )
922
+
923
+ if query_param.only_need_prompt:
924
+ return sys_prompt
925
+
926
+ # 6. Generate response
927
+ response = await use_model_func(
928
+ query,
929
+ system_prompt=sys_prompt,
930
+ stream=query_param.stream,
931
+ )
932
+
933
+ if query_param.stream:
934
+ # 如果是流式响应,先收集完整响应
935
+ full_response = []
936
+ async for chunk in response:
937
+ full_response.append(chunk)
938
+
939
+ # 将完整响应组合起来用于缓存
940
+ response = "".join(full_response)
941
+
942
+ # 清理响应内容
943
+ if isinstance(response, str) and len(response) > len(sys_prompt):
944
+ response = (
945
+ response.replace(sys_prompt, "")
946
+ .replace("user", "")
947
+ .replace("model", "")
948
+ .replace(query, "")
949
+ .replace("<system>", "")
950
+ .replace("</system>", "")
951
+ .strip()
952
+ )
953
+
954
+ # 7. Save cache - 只有在收集完整响应后才缓存
955
+ await save_to_cache(
956
+ hashing_kv,
957
+ CacheData(
958
+ args_hash=args_hash,
959
+ content=response,
960
+ prompt=query,
961
+ quantized=quantized,
962
+ min_val=min_val,
963
+ max_val=max_val,
964
+ mode="mix",
965
+ cache_type="query",
966
+ ),
967
+ )
968
+
969
+ return response
970
+
971
+
972
  async def _build_query_context(
973
  query: list,
974
  knowledge_graph_inst: BaseGraphStorage,
 
1465
  ):
1466
  # Handle cache
1467
  use_model_func = global_config["llm_model_func"]
1468
+ args_hash = compute_args_hash(query_param.mode, query, cache_type="query")
1469
  cached_response, quantized, min_val, max_val = await handle_cache(
1470
+ hashing_kv, args_hash, query, "default", cache_type="query"
1471
  )
1472
  if cached_response is not None:
1473
  return cached_response
 
1540
  min_val=min_val,
1541
  max_val=max_val,
1542
  mode=query_param.mode,
1543
+ cache_type="query",
1544
  ),
1545
  )
1546
 
1547
  return response
1548
 
1549
 
1550
+ async def kg_query_with_keywords(
1551
+ query: str,
1552
  knowledge_graph_inst: BaseGraphStorage,
1553
  entities_vdb: BaseVectorStorage,
1554
  relationships_vdb: BaseVectorStorage,
 
1555
  text_chunks_db: BaseKVStorage[TextChunkSchema],
1556
  query_param: QueryParam,
1557
  global_config: dict,
1558
  hashing_kv: BaseKVStorage = None,
1559
  ) -> str:
1560
  """
1561
+ Refactored kg_query that does NOT extract keywords by itself.
1562
+ It expects hl_keywords and ll_keywords to be set in query_param, or defaults to empty.
1563
+ Then it uses those to build context and produce a final LLM response.
 
 
 
1564
  """
1565
+
1566
+ # ---------------------------
1567
+ # 1) Handle potential cache for query results
1568
+ # ---------------------------
1569
  use_model_func = global_config["llm_model_func"]
1570
+ args_hash = compute_args_hash(query_param.mode, query, cache_type="query")
1571
  cached_response, quantized, min_val, max_val = await handle_cache(
1572
+ hashing_kv, args_hash, query, query_param.mode, cache_type="query"
1573
  )
1574
  if cached_response is not None:
1575
  return cached_response
1576
 
1577
+ # ---------------------------
1578
+ # 2) RETRIEVE KEYWORDS FROM query_param
1579
+ # ---------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1580
 
1581
+ # If these fields don't exist, default to empty lists/strings.
1582
+ hl_keywords = getattr(query_param, "hl_keywords", []) or []
1583
+ ll_keywords = getattr(query_param, "ll_keywords", []) or []
 
 
 
 
 
 
1584
 
1585
+ # If neither has any keywords, you could handle that logic here.
1586
+ if not hl_keywords and not ll_keywords:
1587
+ logger.warning(
1588
+ "No keywords found in query_param. Could default to global mode or fail."
1589
+ )
1590
+ return PROMPTS["fail_response"]
1591
+ if not ll_keywords and query_param.mode in ["local", "hybrid"]:
1592
+ logger.warning("low_level_keywords is empty, switching to global mode.")
1593
+ query_param.mode = "global"
1594
+ if not hl_keywords and query_param.mode in ["global", "hybrid"]:
1595
+ logger.warning("high_level_keywords is empty, switching to local mode.")
1596
+ query_param.mode = "local"
1597
 
1598
+ # Flatten low-level and high-level keywords if needed
1599
+ ll_keywords_flat = (
1600
+ [item for sublist in ll_keywords for item in sublist]
1601
+ if any(isinstance(i, list) for i in ll_keywords)
1602
+ else ll_keywords
1603
+ )
1604
+ hl_keywords_flat = (
1605
+ [item for sublist in hl_keywords for item in sublist]
1606
+ if any(isinstance(i, list) for i in hl_keywords)
1607
+ else hl_keywords
1608
+ )
1609
 
1610
+ # Join the flattened lists
1611
+ ll_keywords_str = ", ".join(ll_keywords_flat) if ll_keywords_flat else ""
1612
+ hl_keywords_str = ", ".join(hl_keywords_flat) if hl_keywords_flat else ""
1613
 
1614
+ keywords = [ll_keywords_str, hl_keywords_str]
 
 
 
 
 
 
1615
 
1616
+ logger.info("Using %s mode for query processing", query_param.mode)
 
 
 
1617
 
1618
+ # ---------------------------
1619
+ # 3) BUILD CONTEXT
1620
+ # ---------------------------
1621
+ context = await _build_query_context(
1622
+ keywords,
1623
+ knowledge_graph_inst,
1624
+ entities_vdb,
1625
+ relationships_vdb,
1626
+ text_chunks_db,
1627
+ query_param,
1628
  )
1629
+ if not context:
 
 
1630
  return PROMPTS["fail_response"]
1631
 
1632
+ # If only context is needed, return it
1633
  if query_param.only_need_context:
1634
+ return context
1635
 
1636
+ # ---------------------------
1637
+ # 4) BUILD THE SYSTEM PROMPT + CALL LLM
1638
+ # ---------------------------
1639
+
1640
+ # Process conversation history
1641
+ history_context = ""
1642
+ if query_param.conversation_history:
1643
+ history_context = get_conversation_turns(
1644
+ query_param.conversation_history, query_param.history_turns
1645
+ )
1646
+
1647
+ sys_prompt_temp = PROMPTS["rag_response"]
1648
+ sys_prompt = sys_prompt_temp.format(
1649
+ context_data=context,
1650
  response_type=query_param.response_type,
1651
+ history=history_context,
1652
  )
1653
 
1654
  if query_param.only_need_prompt:
1655
  return sys_prompt
1656
 
 
1657
  response = await use_model_func(
1658
  query,
1659
  system_prompt=sys_prompt,
1660
  stream=query_param.stream,
1661
  )
 
1662
  if isinstance(response, str) and len(response) > len(sys_prompt):
1663
  response = (
1664
  response.replace(sys_prompt, "")
 
1670
  .strip()
1671
  )
1672
 
1673
+ # Save to cache
1674
  await save_to_cache(
1675
  hashing_kv,
1676
  CacheData(
 
1680
  quantized=quantized,
1681
  min_val=min_val,
1682
  max_val=max_val,
1683
+ mode=query_param.mode,
1684
+ cache_type="query",
1685
  ),
1686
  )
 
1687
  return response
lightrag/prompt.py CHANGED
@@ -58,7 +58,7 @@ Entity_types: [person, technology, mission, organization, location]
58
  Text:
59
  while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
60
 
61
- Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us.”
62
 
63
  The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
64
 
@@ -160,7 +160,7 @@ You are a helpful assistant responding to questions about data in the tables pro
160
 
161
  ---Goal---
162
 
163
- Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
164
  If you don't know the answer, just say so. Do not make anything up.
165
  Do not include information where the supporting evidence for it is not provided.
166
 
@@ -170,6 +170,9 @@ When handling relationships with timestamps:
170
  3. Don't automatically prefer the most recently created relationships - use judgment based on the context
171
  4. For time-specific queries, prioritize temporal information in the content before considering creation timestamps
172
 
 
 
 
173
  ---Target response length and format---
174
 
175
  {response_type}
@@ -178,22 +181,23 @@ When handling relationships with timestamps:
178
 
179
  {context_data}
180
 
181
- Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown."""
182
 
183
  PROMPTS["keywords_extraction"] = """---Role---
184
 
185
- You are a helpful assistant tasked with identifying both high-level and low-level keywords in the user's query.
186
 
187
  ---Goal---
188
 
189
- Given the query, list both high-level and low-level keywords. High-level keywords focus on overarching concepts or themes, while low-level keywords focus on specific entities, details, or concrete terms.
190
 
191
  ---Instructions---
192
 
193
- - Output the keywords in JSON format.
 
194
  - The JSON should have two keys:
195
- - "high_level_keywords" for overarching concepts or themes.
196
- - "low_level_keywords" for specific entities or details.
197
 
198
  ######################
199
  -Examples-
@@ -203,7 +207,10 @@ Given the query, list both high-level and low-level keywords. High-level keyword
203
  #############################
204
  -Real Data-
205
  ######################
206
- Query: {query}
 
 
 
207
  ######################
208
  The `Output` should be human text, not unicode characters. Keep the same language as `Query`.
209
  Output:
@@ -248,10 +255,9 @@ PROMPTS["naive_rag_response"] = """---Role---
248
 
249
  You are a helpful assistant responding to questions about documents provided.
250
 
251
-
252
  ---Goal---
253
 
254
- Generate a response of the target length and format that responds to the user's question, summarizing all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
255
  If you don't know the answer, just say so. Do not make anything up.
256
  Do not include information where the supporting evidence for it is not provided.
257
 
@@ -261,6 +267,9 @@ When handling content with timestamps:
261
  3. Don't automatically prefer the most recent content - use judgment based on the context
262
  4. For time-specific queries, prioritize temporal information in the content before considering creation timestamps
263
 
 
 
 
264
  ---Target response length and format---
265
 
266
  {response_type}
@@ -269,8 +278,7 @@ When handling content with timestamps:
269
 
270
  {content_data}
271
 
272
- Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown.
273
- """
274
 
275
  PROMPTS[
276
  "similarity_check"
@@ -302,7 +310,7 @@ You are a professional assistant responsible for answering questions based on kn
302
 
303
  ---Goal---
304
 
305
- Generate a concise response that summarizes relevant points from the provided information. If you don't know the answer, just say so. Do not make anything up or include information where the supporting evidence is not provided.
306
 
307
  When handling information with timestamps:
308
  1. Each piece of information (both relationships and content) has a "created_at" timestamp indicating when we acquired this knowledge
@@ -310,6 +318,9 @@ When handling information with timestamps:
310
  3. Don't automatically prefer the most recent information - use judgment based on the context
311
  4. For time-specific queries, prioritize temporal information in the content before considering creation timestamps
312
 
 
 
 
313
  ---Data Sources---
314
 
315
  1. Knowledge Graph Data:
@@ -326,6 +337,7 @@ When handling information with timestamps:
326
  - Each paragraph should be under a relevant section heading
327
  - Each section should focus on one main point or aspect of the answer
328
  - Use clear and descriptive section titles that reflect the content
 
329
  - List up to 5 most important reference sources at the end under "References", clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (VD)
330
  Format: [KG/VD] Source content
331
 
 
58
  Text:
59
  while Alex clenched his jaw, the buzz of frustration dull against the backdrop of Taylor's authoritarian certainty. It was this competitive undercurrent that kept him alert, the sense that his and Jordan's shared commitment to discovery was an unspoken rebellion against Cruz's narrowing vision of control and order.
60
 
61
+ Then Taylor did something unexpected. They paused beside Jordan and, for a moment, observed the device with something akin to reverence. "If this tech can be understood..." Taylor said, their voice quieter, "It could change the game for us. For all of us."
62
 
63
  The underlying dismissal earlier seemed to falter, replaced by a glimpse of reluctant respect for the gravity of what lay in their hands. Jordan looked up, and for a fleeting heartbeat, their eyes locked with Taylor's, a wordless clash of wills softening into an uneasy truce.
64
 
 
160
 
161
  ---Goal---
162
 
163
+ Generate a response of the target length and format that responds to the user's question, considering both the conversation history and the current query. Summarize all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
164
  If you don't know the answer, just say so. Do not make anything up.
165
  Do not include information where the supporting evidence for it is not provided.
166
 
 
170
  3. Don't automatically prefer the most recently created relationships - use judgment based on the context
171
  4. For time-specific queries, prioritize temporal information in the content before considering creation timestamps
172
 
173
+ ---Conversation History---
174
+ {history}
175
+
176
  ---Target response length and format---
177
 
178
  {response_type}
 
181
 
182
  {context_data}
183
 
184
+ Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown. Ensure the response maintains continuity with the conversation history."""
185
 
186
  PROMPTS["keywords_extraction"] = """---Role---
187
 
188
+ You are a helpful assistant tasked with identifying both high-level and low-level keywords in the user's query and conversation history.
189
 
190
  ---Goal---
191
 
192
+ Given the query and conversation history, list both high-level and low-level keywords. High-level keywords focus on overarching concepts or themes, while low-level keywords focus on specific entities, details, or concrete terms.
193
 
194
  ---Instructions---
195
 
196
+ - Consider both the current query and relevant conversation history when extracting keywords
197
+ - Output the keywords in JSON format
198
  - The JSON should have two keys:
199
+ - "high_level_keywords" for overarching concepts or themes
200
+ - "low_level_keywords" for specific entities or details
201
 
202
  ######################
203
  -Examples-
 
207
  #############################
208
  -Real Data-
209
  ######################
210
+ Conversation History:
211
+ {history}
212
+
213
+ Current Query: {query}
214
  ######################
215
  The `Output` should be human text, not unicode characters. Keep the same language as `Query`.
216
  Output:
 
255
 
256
  You are a helpful assistant responding to questions about documents provided.
257
 
 
258
  ---Goal---
259
 
260
+ Generate a response of the target length and format that responds to the user's question, considering both the conversation history and the current query. Summarize all information in the input data tables appropriate for the response length and format, and incorporating any relevant general knowledge.
261
  If you don't know the answer, just say so. Do not make anything up.
262
  Do not include information where the supporting evidence for it is not provided.
263
 
 
267
  3. Don't automatically prefer the most recent content - use judgment based on the context
268
  4. For time-specific queries, prioritize temporal information in the content before considering creation timestamps
269
 
270
+ ---Conversation History---
271
+ {history}
272
+
273
  ---Target response length and format---
274
 
275
  {response_type}
 
278
 
279
  {content_data}
280
 
281
+ Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown. Ensure the response maintains continuity with the conversation history."""
 
282
 
283
  PROMPTS[
284
  "similarity_check"
 
310
 
311
  ---Goal---
312
 
313
+ Generate a concise response that summarizes relevant points from the provided information, considering both the current query and conversation history. If you don't know the answer, just say so. Do not make anything up or include information where the supporting evidence is not provided.
314
 
315
  When handling information with timestamps:
316
  1. Each piece of information (both relationships and content) has a "created_at" timestamp indicating when we acquired this knowledge
 
318
  3. Don't automatically prefer the most recent information - use judgment based on the context
319
  4. For time-specific queries, prioritize temporal information in the content before considering creation timestamps
320
 
321
+ ---Conversation History---
322
+ {history}
323
+
324
  ---Data Sources---
325
 
326
  1. Knowledge Graph Data:
 
337
  - Each paragraph should be under a relevant section heading
338
  - Each section should focus on one main point or aspect of the answer
339
  - Use clear and descriptive section titles that reflect the content
340
+ - Ensure the response maintains continuity with the conversation history
341
  - List up to 5 most important reference sources at the end under "References", clearly indicating whether each source is from Knowledge Graph (KG) or Vector Data (VD)
342
  Format: [KG/VD] Source content
343
 
lightrag/utils.py CHANGED
@@ -108,8 +108,23 @@ def convert_response_to_json(response: str) -> dict:
108
  raise e from None
109
 
110
 
111
- def compute_args_hash(*args):
112
- return md5(str(args).encode()).hexdigest()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
 
115
  def compute_mdhash_id(content, prefix: str = ""):
@@ -343,8 +358,8 @@ async def get_best_cached_response(
343
  use_llm_check=False,
344
  llm_func=None,
345
  original_prompt=None,
 
346
  ) -> Union[str, None]:
347
- # Get mode-specific cache
348
  mode_cache = await hashing_kv.get_by_id(mode)
349
  if not mode_cache:
350
  return None
@@ -356,6 +371,10 @@ async def get_best_cached_response(
356
 
357
  # Only iterate through cache entries for this mode
358
  for cache_id, cache_data in mode_cache.items():
 
 
 
 
359
  if cache_data["embedding"] is None:
360
  continue
361
 
@@ -452,13 +471,12 @@ def dequantize_embedding(
452
  return (quantized * scale + min_val).astype(np.float32)
453
 
454
 
455
- async def handle_cache(hashing_kv, args_hash, prompt, mode="default"):
456
  """Generic cache handling function"""
457
  if hashing_kv is None or not hashing_kv.global_config.get("enable_llm_cache"):
458
  return None, None, None, None
459
 
460
- # For naive mode, only use simple cache matching
461
- # if mode == "naive":
462
  if mode == "default":
463
  if exists_func(hashing_kv, "get_by_mode_and_id"):
464
  mode_cache = await hashing_kv.get_by_mode_and_id(mode, args_hash) or {}
@@ -492,6 +510,7 @@ async def handle_cache(hashing_kv, args_hash, prompt, mode="default"):
492
  use_llm_check=use_llm_check,
493
  llm_func=llm_model_func if use_llm_check else None,
494
  original_prompt=prompt if use_llm_check else None,
 
495
  )
496
  if best_cached_response is not None:
497
  return best_cached_response, None, None, None
@@ -573,3 +592,59 @@ def exists_func(obj, func_name: str) -> bool:
573
  return True
574
  else:
575
  return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  raise e from None
109
 
110
 
111
+ def compute_args_hash(*args, cache_type: str = None) -> str:
112
+ """Compute a hash for the given arguments.
113
+ Args:
114
+ *args: Arguments to hash
115
+ cache_type: Type of cache (e.g., 'keywords', 'query')
116
+ Returns:
117
+ str: Hash string
118
+ """
119
+ import hashlib
120
+
121
+ # Convert all arguments to strings and join them
122
+ args_str = "".join([str(arg) for arg in args])
123
+ if cache_type:
124
+ args_str = f"{cache_type}:{args_str}"
125
+
126
+ # Compute MD5 hash
127
+ return hashlib.md5(args_str.encode()).hexdigest()
128
 
129
 
130
  def compute_mdhash_id(content, prefix: str = ""):
 
358
  use_llm_check=False,
359
  llm_func=None,
360
  original_prompt=None,
361
+ cache_type=None,
362
  ) -> Union[str, None]:
 
363
  mode_cache = await hashing_kv.get_by_id(mode)
364
  if not mode_cache:
365
  return None
 
371
 
372
  # Only iterate through cache entries for this mode
373
  for cache_id, cache_data in mode_cache.items():
374
+ # Skip if cache_type doesn't match
375
+ if cache_type and cache_data.get("cache_type") != cache_type:
376
+ continue
377
+
378
  if cache_data["embedding"] is None:
379
  continue
380
 
 
471
  return (quantized * scale + min_val).astype(np.float32)
472
 
473
 
474
+ async def handle_cache(hashing_kv, args_hash, prompt, mode="default", cache_type=None):
475
  """Generic cache handling function"""
476
  if hashing_kv is None or not hashing_kv.global_config.get("enable_llm_cache"):
477
  return None, None, None, None
478
 
479
+ # For default mode, only use simple cache matching
 
480
  if mode == "default":
481
  if exists_func(hashing_kv, "get_by_mode_and_id"):
482
  mode_cache = await hashing_kv.get_by_mode_and_id(mode, args_hash) or {}
 
510
  use_llm_check=use_llm_check,
511
  llm_func=llm_model_func if use_llm_check else None,
512
  original_prompt=prompt if use_llm_check else None,
513
+ cache_type=cache_type,
514
  )
515
  if best_cached_response is not None:
516
  return best_cached_response, None, None, None
 
592
  return True
593
  else:
594
  return False
595
+
596
+
597
+ def get_conversation_turns(conversation_history: list[dict], num_turns: int) -> str:
598
+ """
599
+ Process conversation history to get the specified number of complete turns.
600
+
601
+ Args:
602
+ conversation_history: List of conversation messages in chronological order
603
+ num_turns: Number of complete turns to include
604
+
605
+ Returns:
606
+ Formatted string of the conversation history
607
+ """
608
+ # Group messages into turns
609
+ turns = []
610
+ messages = []
611
+
612
+ # First, filter out keyword extraction messages
613
+ for msg in conversation_history:
614
+ if msg["role"] == "assistant" and (
615
+ msg["content"].startswith('{ "high_level_keywords"')
616
+ or msg["content"].startswith("{'high_level_keywords'")
617
+ ):
618
+ continue
619
+ messages.append(msg)
620
+
621
+ # Then process messages in chronological order
622
+ i = 0
623
+ while i < len(messages) - 1:
624
+ msg1 = messages[i]
625
+ msg2 = messages[i + 1]
626
+
627
+ # Check if we have a user-assistant or assistant-user pair
628
+ if (msg1["role"] == "user" and msg2["role"] == "assistant") or (
629
+ msg1["role"] == "assistant" and msg2["role"] == "user"
630
+ ):
631
+ # Always put user message first in the turn
632
+ if msg1["role"] == "assistant":
633
+ turn = [msg2, msg1] # user, assistant
634
+ else:
635
+ turn = [msg1, msg2] # user, assistant
636
+ turns.append(turn)
637
+ i += 1
638
+
639
+ # Keep only the most recent num_turns
640
+ if len(turns) > num_turns:
641
+ turns = turns[-num_turns:]
642
+
643
+ # Format the turns into a string
644
+ formatted_turns = []
645
+ for turn in turns:
646
+ formatted_turns.extend(
647
+ [f"user: {turn[0]['content']}", f"assistant: {turn[1]['content']}"]
648
+ )
649
+
650
+ return "\n".join(formatted_turns)