jin commited on
Commit
98478af
·
1 Parent(s): ac47ddf

Logic Optimization

Browse files
.gitignore CHANGED
@@ -13,4 +13,4 @@ ignore_this.txt
13
  *.ignore.*
14
  .ruff_cache/
15
  gui/
16
- *.log
 
13
  *.ignore.*
14
  .ruff_cache/
15
  gui/
16
+ *.log
examples/lightrag_api_oracle_demo..py CHANGED
@@ -1,16 +1,14 @@
1
-
2
  from fastapi import FastAPI, HTTPException, File, UploadFile
3
  from fastapi import Query
4
  from contextlib import asynccontextmanager
5
  from pydantic import BaseModel
6
- from typing import Optional,Any
7
- from fastapi.responses import JSONResponse
 
 
 
8
 
9
- import sys, os
10
- print(os.getcwd())
11
  from pathlib import Path
12
- script_directory = Path(__file__).resolve().parent.parent
13
- sys.path.append(os.path.abspath(script_directory))
14
 
15
  import asyncio
16
  import nest_asyncio
@@ -18,10 +16,12 @@ from lightrag import LightRAG, QueryParam
18
  from lightrag.llm import openai_complete_if_cache, openai_embedding
19
  from lightrag.utils import EmbeddingFunc
20
  import numpy as np
21
- from datetime import datetime
22
 
23
  from lightrag.kg.oracle_impl import OracleDB
24
 
 
 
 
25
 
26
 
27
  # Apply nest_asyncio to solve event loop issues
@@ -47,7 +47,8 @@ print(f"EMBEDDING_MAX_TOKEN_SIZE: {EMBEDDING_MAX_TOKEN_SIZE}")
47
 
48
  if not os.path.exists(WORKING_DIR):
49
  os.mkdir(WORKING_DIR)
50
-
 
51
  async def llm_model_func(
52
  prompt, system_prompt=None, history_messages=[], **kwargs
53
  ) -> str:
@@ -77,10 +78,10 @@ async def get_embedding_dim():
77
  embedding_dim = embedding.shape[1]
78
  return embedding_dim
79
 
 
80
  async def init():
81
-
82
  # Detect embedding dimension
83
- embedding_dimension = 1024 #await get_embedding_dim()
84
  print(f"Detected embedding dimension: {embedding_dimension}")
85
  # Create Oracle DB connection
86
  # The `config` parameter is the connection configuration of Oracle DB
@@ -88,36 +89,36 @@ async def init():
88
  # We storage data in unified tables, so we need to set a `workspace` parameter to specify which docs we want to store and query
89
  # Below is an example of how to connect to Oracle Autonomous Database on Oracle Cloud
90
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- oracle_db = OracleDB(config={
93
- "user":"",
94
- "password":"",
95
- "dsn":"",
96
- "config_dir":"path_to_config_dir",
97
- "wallet_location":"path_to_wallet_location",
98
- "wallet_password":"wallet_password",
99
- "workspace":"company"
100
- } # specify which docs you want to store and query
101
- )
102
-
103
  # Check if Oracle DB tables exist, if not, tables will be created
104
  await oracle_db.check_tables()
105
  # Initialize LightRAG
106
- # We use Oracle DB as the KV/vector/graph storage
107
  rag = LightRAG(
108
- enable_llm_cache=False,
109
- working_dir=WORKING_DIR,
110
- chunk_token_size=512,
111
- llm_model_func=llm_model_func,
112
- embedding_func=EmbeddingFunc(
113
- embedding_dim=embedding_dimension,
114
- max_token_size=512,
115
- func=embedding_func,
116
- ),
117
- graph_storage = "OracleGraphStorage",
118
- kv_storage="OracleKVStorage",
119
- vector_storage="OracleVectorDBStorage"
120
- )
121
 
122
  # Setthe KV/vector/graph storage's `db` property, so all operation will use same connection pool
123
  rag.graph_storage_cls.db = oracle_db
@@ -128,7 +129,7 @@ async def init():
128
 
129
 
130
  # Extract and Insert into LightRAG storage
131
- #with open("./dickens/book.txt", "r", encoding="utf-8") as f:
132
  # await rag.ainsert(f.read())
133
 
134
  # # Perform search in different modes
@@ -147,9 +148,11 @@ class QueryRequest(BaseModel):
147
  only_need_context: bool = False
148
  only_need_prompt: bool = False
149
 
 
150
  class DataRequest(BaseModel):
151
  limit: int = 100
152
 
 
153
  class InsertRequest(BaseModel):
154
  text: str
155
 
@@ -164,6 +167,7 @@ class Response(BaseModel):
164
 
165
  rag = None
166
 
 
167
  @asynccontextmanager
168
  async def lifespan(app: FastAPI):
169
  global rag
@@ -172,25 +176,28 @@ async def lifespan(app: FastAPI):
172
  yield
173
 
174
 
175
- app = FastAPI(title="LightRAG API", description="API for RAG operations",lifespan=lifespan)
 
 
 
176
 
177
  @app.post("/query", response_model=Response)
178
  async def query_endpoint(request: QueryRequest):
179
- #try:
180
- # loop = asyncio.get_event_loop()
181
  if request.mode == "naive":
182
  top_k = 3
183
  else:
184
  top_k = 60
185
  result = await rag.aquery(
186
- request.query,
187
- param=QueryParam(
188
- mode=request.mode,
189
- only_need_context=request.only_need_context,
190
- only_need_prompt=request.only_need_prompt,
191
- top_k=top_k
192
- ),
193
- )
194
  return Response(status="success", data=result)
195
  # except Exception as e:
196
  # raise HTTPException(status_code=500, detail=str(e))
@@ -199,9 +206,9 @@ async def query_endpoint(request: QueryRequest):
199
  @app.get("/data", response_model=Response)
200
  async def query_all_nodes(type: str = Query("nodes"), limit: int = Query(100)):
201
  if type == "nodes":
202
- result = await rag.chunk_entity_relation_graph.get_all_nodes(limit = limit)
203
  elif type == "edges":
204
- result = await rag.chunk_entity_relation_graph.get_all_edges(limit = limit)
205
  elif type == "statistics":
206
  result = await rag.chunk_entity_relation_graph.get_statistics()
207
  return Response(status="success", data=result)
@@ -264,4 +271,4 @@ if __name__ == "__main__":
264
  # curl -X POST "http://127.0.0.1:8020/insert_file" -H "Content-Type: application/json" -d '{"file_path": "path/to/your/file.txt"}'
265
 
266
  # 4. Health check:
267
- # curl -X GET "http://127.0.0.1:8020/health"
 
 
1
  from fastapi import FastAPI, HTTPException, File, UploadFile
2
  from fastapi import Query
3
  from contextlib import asynccontextmanager
4
  from pydantic import BaseModel
5
+ from typing import Optional, Any
6
+
7
+ import sys
8
+ import os
9
+
10
 
 
 
11
  from pathlib import Path
 
 
12
 
13
  import asyncio
14
  import nest_asyncio
 
16
  from lightrag.llm import openai_complete_if_cache, openai_embedding
17
  from lightrag.utils import EmbeddingFunc
18
  import numpy as np
 
19
 
20
  from lightrag.kg.oracle_impl import OracleDB
21
 
22
+ print(os.getcwd())
23
+ script_directory = Path(__file__).resolve().parent.parent
24
+ sys.path.append(os.path.abspath(script_directory))
25
 
26
 
27
  # Apply nest_asyncio to solve event loop issues
 
47
 
48
  if not os.path.exists(WORKING_DIR):
49
  os.mkdir(WORKING_DIR)
50
+
51
+
52
  async def llm_model_func(
53
  prompt, system_prompt=None, history_messages=[], **kwargs
54
  ) -> str:
 
78
  embedding_dim = embedding.shape[1]
79
  return embedding_dim
80
 
81
+
82
  async def init():
 
83
  # Detect embedding dimension
84
+ embedding_dimension = 1024 # await get_embedding_dim()
85
  print(f"Detected embedding dimension: {embedding_dimension}")
86
  # Create Oracle DB connection
87
  # The `config` parameter is the connection configuration of Oracle DB
 
89
  # We storage data in unified tables, so we need to set a `workspace` parameter to specify which docs we want to store and query
90
  # Below is an example of how to connect to Oracle Autonomous Database on Oracle Cloud
91
 
92
+ oracle_db = OracleDB(
93
+ config={
94
+ "user": "",
95
+ "password": "",
96
+ "dsn": "",
97
+ "config_dir": "path_to_config_dir",
98
+ "wallet_location": "path_to_wallet_location",
99
+ "wallet_password": "wallet_password",
100
+ "workspace": "company",
101
+ } # specify which docs you want to store and query
102
+ )
103
 
 
 
 
 
 
 
 
 
 
 
 
104
  # Check if Oracle DB tables exist, if not, tables will be created
105
  await oracle_db.check_tables()
106
  # Initialize LightRAG
107
+ # We use Oracle DB as the KV/vector/graph storage
108
  rag = LightRAG(
109
+ enable_llm_cache=False,
110
+ working_dir=WORKING_DIR,
111
+ chunk_token_size=512,
112
+ llm_model_func=llm_model_func,
113
+ embedding_func=EmbeddingFunc(
114
+ embedding_dim=embedding_dimension,
115
+ max_token_size=512,
116
+ func=embedding_func,
117
+ ),
118
+ graph_storage="OracleGraphStorage",
119
+ kv_storage="OracleKVStorage",
120
+ vector_storage="OracleVectorDBStorage",
121
+ )
122
 
123
  # Setthe KV/vector/graph storage's `db` property, so all operation will use same connection pool
124
  rag.graph_storage_cls.db = oracle_db
 
129
 
130
 
131
  # Extract and Insert into LightRAG storage
132
+ # with open("./dickens/book.txt", "r", encoding="utf-8") as f:
133
  # await rag.ainsert(f.read())
134
 
135
  # # Perform search in different modes
 
148
  only_need_context: bool = False
149
  only_need_prompt: bool = False
150
 
151
+
152
  class DataRequest(BaseModel):
153
  limit: int = 100
154
 
155
+
156
  class InsertRequest(BaseModel):
157
  text: str
158
 
 
167
 
168
  rag = None
169
 
170
+
171
  @asynccontextmanager
172
  async def lifespan(app: FastAPI):
173
  global rag
 
176
  yield
177
 
178
 
179
+ app = FastAPI(
180
+ title="LightRAG API", description="API for RAG operations", lifespan=lifespan
181
+ )
182
+
183
 
184
  @app.post("/query", response_model=Response)
185
  async def query_endpoint(request: QueryRequest):
186
+ # try:
187
+ # loop = asyncio.get_event_loop()
188
  if request.mode == "naive":
189
  top_k = 3
190
  else:
191
  top_k = 60
192
  result = await rag.aquery(
193
+ request.query,
194
+ param=QueryParam(
195
+ mode=request.mode,
196
+ only_need_context=request.only_need_context,
197
+ only_need_prompt=request.only_need_prompt,
198
+ top_k=top_k,
199
+ ),
200
+ )
201
  return Response(status="success", data=result)
202
  # except Exception as e:
203
  # raise HTTPException(status_code=500, detail=str(e))
 
206
  @app.get("/data", response_model=Response)
207
  async def query_all_nodes(type: str = Query("nodes"), limit: int = Query(100)):
208
  if type == "nodes":
209
+ result = await rag.chunk_entity_relation_graph.get_all_nodes(limit=limit)
210
  elif type == "edges":
211
+ result = await rag.chunk_entity_relation_graph.get_all_edges(limit=limit)
212
  elif type == "statistics":
213
  result = await rag.chunk_entity_relation_graph.get_statistics()
214
  return Response(status="success", data=result)
 
271
  # curl -X POST "http://127.0.0.1:8020/insert_file" -H "Content-Type: application/json" -d '{"file_path": "path/to/your/file.txt"}'
272
 
273
  # 4. Health check:
274
+ # curl -X GET "http://127.0.0.1:8020/health"
examples/lightrag_oracle_demo.py CHANGED
@@ -97,8 +97,7 @@ async def main():
97
  graph_storage="OracleGraphStorage",
98
  kv_storage="OracleKVStorage",
99
  vector_storage="OracleVectorDBStorage",
100
-
101
- addon_params = {"example_number":1, "language":"Simplfied Chinese"},
102
  )
103
 
104
  # Setthe KV/vector/graph storage's `db` property, so all operation will use same connection pool
 
97
  graph_storage="OracleGraphStorage",
98
  kv_storage="OracleKVStorage",
99
  vector_storage="OracleVectorDBStorage",
100
+ addon_params={"example_number": 1, "language": "Simplfied Chinese"},
 
101
  )
102
 
103
  # Setthe KV/vector/graph storage's `db` property, so all operation will use same connection pool
lightrag/kg/oracle_impl.py CHANGED
@@ -114,7 +114,9 @@ class OracleDB:
114
 
115
  logger.info("Finished check all tables in Oracle database")
116
 
117
- async def query(self, sql: str, multirows: bool = False) -> Union[dict, None]:
 
 
118
  async with self.pool.acquire() as connection:
119
  connection.inputtypehandler = self.input_type_handler
120
  connection.outputtypehandler = self.output_type_handler
@@ -256,7 +258,7 @@ class OracleKVStorage(BaseKVStorage):
256
  item["__vector__"],
257
  ]
258
  # print(merge_sql)
259
- await self.db.execute(merge_sql, data)
260
 
261
  if self.namespace == "full_docs":
262
  for k, v in self._data.items():
@@ -266,7 +268,7 @@ class OracleKVStorage(BaseKVStorage):
266
  )
267
  values = [k, self._data[k]["content"], self.db.workspace]
268
  # print(merge_sql)
269
- await self.db.execute(merge_sql, data)
270
  return left_data
271
 
272
  async def index_done_callback(self):
 
114
 
115
  logger.info("Finished check all tables in Oracle database")
116
 
117
+ async def query(
118
+ self, sql: str, params: dict = None, multirows: bool = False
119
+ ) -> Union[dict, None]:
120
  async with self.pool.acquire() as connection:
121
  connection.inputtypehandler = self.input_type_handler
122
  connection.outputtypehandler = self.output_type_handler
 
258
  item["__vector__"],
259
  ]
260
  # print(merge_sql)
261
+ await self.db.execute(merge_sql, values)
262
 
263
  if self.namespace == "full_docs":
264
  for k, v in self._data.items():
 
268
  )
269
  values = [k, self._data[k]["content"], self.db.workspace]
270
  # print(merge_sql)
271
+ await self.db.execute(merge_sql, values)
272
  return left_data
273
 
274
  async def index_done_callback(self):
lightrag/llm.py CHANGED
@@ -70,8 +70,8 @@ async def openai_complete_if_cache(
70
  model=model, messages=messages, **kwargs
71
  )
72
  content = response.choices[0].message.content
73
- if r'\u' in content:
74
- content = content.encode('utf-8').decode('unicode_escape')
75
  print(content)
76
  if hashing_kv is not None:
77
  await hashing_kv.upsert(
@@ -542,7 +542,7 @@ async def openai_embedding(
542
  texts: list[str],
543
  model: str = "text-embedding-3-small",
544
  base_url: str = None,
545
- api_key: str = None
546
  ) -> np.ndarray:
547
  if api_key:
548
  os.environ["OPENAI_API_KEY"] = api_key
@@ -551,7 +551,7 @@ async def openai_embedding(
551
  AsyncOpenAI() if base_url is None else AsyncOpenAI(base_url=base_url)
552
  )
553
  response = await openai_async_client.embeddings.create(
554
- model=model, input=texts, encoding_format="float"
555
  )
556
  return np.array([dp.embedding for dp in response.data])
557
 
 
70
  model=model, messages=messages, **kwargs
71
  )
72
  content = response.choices[0].message.content
73
+ if r"\u" in content:
74
+ content = content.encode("utf-8").decode("unicode_escape")
75
  print(content)
76
  if hashing_kv is not None:
77
  await hashing_kv.upsert(
 
542
  texts: list[str],
543
  model: str = "text-embedding-3-small",
544
  base_url: str = None,
545
+ api_key: str = None,
546
  ) -> np.ndarray:
547
  if api_key:
548
  os.environ["OPENAI_API_KEY"] = api_key
 
551
  AsyncOpenAI() if base_url is None else AsyncOpenAI(base_url=base_url)
552
  )
553
  response = await openai_async_client.embeddings.create(
554
+ model=model, input=texts, encoding_format="float"
555
  )
556
  return np.array([dp.embedding for dp in response.data])
557
 
lightrag/operate.py CHANGED
@@ -249,13 +249,17 @@ async def extract_entities(
249
 
250
  ordered_chunks = list(chunks.items())
251
  # add language and example number params to prompt
252
- language = global_config["addon_params"].get("language",PROMPTS["DEFAULT_LANGUAGE"])
 
 
253
  example_number = global_config["addon_params"].get("example_number", None)
254
- if example_number and example_number<len(PROMPTS["entity_extraction_examples"]):
255
- examples="\n".join(PROMPTS["entity_extraction_examples"][:int(example_number)])
 
 
256
  else:
257
- examples="\n".join(PROMPTS["entity_extraction_examples"])
258
-
259
  entity_extract_prompt = PROMPTS["entity_extraction"]
260
  context_base = dict(
261
  tuple_delimiter=PROMPTS["DEFAULT_TUPLE_DELIMITER"],
@@ -263,8 +267,9 @@ async def extract_entities(
263
  completion_delimiter=PROMPTS["DEFAULT_COMPLETION_DELIMITER"],
264
  entity_types=",".join(PROMPTS["DEFAULT_ENTITY_TYPES"]),
265
  examples=examples,
266
- language=language)
267
-
 
268
  continue_prompt = PROMPTS["entiti_continue_extraction"]
269
  if_loop_prompt = PROMPTS["entiti_if_loop_extraction"]
270
 
@@ -396,6 +401,7 @@ async def extract_entities(
396
 
397
  return knowledge_graph_inst
398
 
 
399
  async def kg_query(
400
  query,
401
  knowledge_graph_inst: BaseGraphStorage,
@@ -408,59 +414,61 @@ async def kg_query(
408
  context = None
409
  example_number = global_config["addon_params"].get("example_number", None)
410
  if example_number and example_number < len(PROMPTS["keywords_extraction_examples"]):
411
- examples = "\n".join(PROMPTS["keywords_extraction_examples"][:int(example_number)])
 
 
412
  else:
413
- examples="\n".join(PROMPTS["keywords_extraction_examples"])
414
-
415
  # Set mode
416
  if query_param.mode not in ["local", "global", "hybrid"]:
417
  logger.error(f"Unknown mode {query_param.mode} in kg_query")
418
  return PROMPTS["fail_response"]
419
-
420
  # LLM generate keywords
421
  use_model_func = global_config["llm_model_func"]
422
  kw_prompt_temp = PROMPTS["keywords_extraction"]
423
- kw_prompt = kw_prompt_temp.format(query=query,examples=examples)
424
- result = await use_model_func(kw_prompt)
425
- logger.info(f"kw_prompt result:")
426
  print(result)
427
  try:
428
  json_text = locate_json_string_body_from_string(result)
429
  keywords_data = json.loads(json_text)
430
  hl_keywords = keywords_data.get("high_level_keywords", [])
431
  ll_keywords = keywords_data.get("low_level_keywords", [])
432
-
433
  # Handle parsing error
434
  except json.JSONDecodeError as e:
435
  print(f"JSON parsing error: {e} {result}")
436
  return PROMPTS["fail_response"]
437
-
438
  # Handdle keywords missing
439
  if hl_keywords == [] and ll_keywords == []:
440
  logger.warning("low_level_keywords and high_level_keywords is empty")
441
- return PROMPTS["fail_response"]
442
- if ll_keywords == [] and query_param.mode in ["local","hybrid"]:
443
  logger.warning("low_level_keywords is empty")
444
  return PROMPTS["fail_response"]
445
  else:
446
  ll_keywords = ", ".join(ll_keywords)
447
- if hl_keywords == [] and query_param.mode in ["global","hybrid"]:
448
  logger.warning("high_level_keywords is empty")
449
  return PROMPTS["fail_response"]
450
  else:
451
  hl_keywords = ", ".join(hl_keywords)
452
-
453
  # Build context
454
- keywords = [ll_keywords, hl_keywords]
455
  context = await _build_query_context(
456
- keywords,
457
- knowledge_graph_inst,
458
- entities_vdb,
459
- relationships_vdb,
460
- text_chunks_db,
461
- query_param,
462
- )
463
-
464
  if query_param.only_need_context:
465
  return context
466
  if context is None:
@@ -468,13 +476,13 @@ async def kg_query(
468
  sys_prompt_temp = PROMPTS["rag_response"]
469
  sys_prompt = sys_prompt_temp.format(
470
  context_data=context, response_type=query_param.response_type
471
- )
472
  if query_param.only_need_prompt:
473
  return sys_prompt
474
  response = await use_model_func(
475
  query,
476
  system_prompt=sys_prompt,
477
- )
478
  if len(response) > len(sys_prompt):
479
  response = (
480
  response.replace(sys_prompt, "")
@@ -496,44 +504,72 @@ async def _build_query_context(
496
  relationships_vdb: BaseVectorStorage,
497
  text_chunks_db: BaseKVStorage[TextChunkSchema],
498
  query_param: QueryParam,
499
- ):
500
  ll_kewwords, hl_keywrds = query[0], query[1]
501
  if query_param.mode in ["local", "hybrid"]:
502
  if ll_kewwords == "":
503
- ll_entities_context,ll_relations_context,ll_text_units_context = "","",""
504
- warnings.warn("Low Level context is None. Return empty Low entity/relationship/source")
 
 
 
 
 
 
505
  query_param.mode = "global"
506
  else:
507
- ll_entities_context,ll_relations_context,ll_text_units_context = await _get_node_data(
 
 
 
 
508
  ll_kewwords,
509
  knowledge_graph_inst,
510
  entities_vdb,
511
  text_chunks_db,
512
- query_param
513
- )
514
  if query_param.mode in ["global", "hybrid"]:
515
  if hl_keywrds == "":
516
- hl_entities_context,hl_relations_context,hl_text_units_context = "","",""
517
- warnings.warn("High Level context is None. Return empty High entity/relationship/source")
 
 
 
 
 
 
518
  query_param.mode = "local"
519
  else:
520
- hl_entities_context,hl_relations_context,hl_text_units_context = await _get_edge_data(
 
 
 
 
521
  hl_keywrds,
522
  knowledge_graph_inst,
523
  relationships_vdb,
524
  text_chunks_db,
525
- query_param
526
- )
527
- if query_param.mode == 'hybrid':
528
- entities_context,relations_context,text_units_context = combine_contexts(
529
- [hl_entities_context,ll_entities_context],
530
- [hl_relations_context,ll_relations_context],
531
- [hl_text_units_context,ll_text_units_context]
532
- )
533
- elif query_param.mode == 'local':
534
- entities_context,relations_context,text_units_context = ll_entities_context,ll_relations_context,ll_text_units_context
535
- elif query_param.mode == 'global':
536
- entities_context,relations_context,text_units_context = hl_entities_context,hl_relations_context,hl_text_units_context
 
 
 
 
 
 
 
 
537
  return f"""
538
  # -----Entities-----
539
  # ```csv
@@ -550,7 +586,6 @@ async def _build_query_context(
550
  # """
551
 
552
 
553
-
554
  async def _get_node_data(
555
  query,
556
  knowledge_graph_inst: BaseGraphStorage,
@@ -568,7 +603,7 @@ async def _get_node_data(
568
  )
569
  if not all([n is not None for n in node_datas]):
570
  logger.warning("Some nodes are missing, maybe the storage is damaged")
571
-
572
  # 获取实体的度
573
  node_degrees = await asyncio.gather(
574
  *[knowledge_graph_inst.node_degree(r["entity_name"]) for r in results]
@@ -588,7 +623,7 @@ async def _get_node_data(
588
  )
589
  logger.info(
590
  f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} text units"
591
- )
592
 
593
  # 构建提示词
594
  entites_section_list = [["id", "entity", "type", "description", "rank"]]
@@ -625,7 +660,7 @@ async def _get_node_data(
625
  for i, t in enumerate(use_text_units):
626
  text_units_section_list.append([i, t["content"]])
627
  text_units_context = list_of_list_to_csv(text_units_section_list)
628
- return entities_context,relations_context,text_units_context
629
 
630
 
631
  async def _find_most_related_text_unit_from_entities(
@@ -821,8 +856,7 @@ async def _get_edge_data(
821
  for i, t in enumerate(use_text_units):
822
  text_units_section_list.append([i, t["content"]])
823
  text_units_context = list_of_list_to_csv(text_units_section_list)
824
- return entities_context,relations_context,text_units_context
825
-
826
 
827
 
828
  async def _find_most_related_entities_from_relationships(
@@ -902,7 +936,7 @@ async def _find_related_text_unit_from_relationships(
902
  def combine_contexts(entities, relationships, sources):
903
  # Function to extract entities, relationships, and sources from context strings
904
  hl_entities, ll_entities = entities[0], entities[1]
905
- hl_relationships, ll_relationships = relationships[0],relationships[1]
906
  hl_sources, ll_sources = sources[0], sources[1]
907
  # Combine and deduplicate the entities
908
  combined_entities = process_combine_contexts(hl_entities, ll_entities)
 
249
 
250
  ordered_chunks = list(chunks.items())
251
  # add language and example number params to prompt
252
+ language = global_config["addon_params"].get(
253
+ "language", PROMPTS["DEFAULT_LANGUAGE"]
254
+ )
255
  example_number = global_config["addon_params"].get("example_number", None)
256
+ if example_number and example_number < len(PROMPTS["entity_extraction_examples"]):
257
+ examples = "\n".join(
258
+ PROMPTS["entity_extraction_examples"][: int(example_number)]
259
+ )
260
  else:
261
+ examples = "\n".join(PROMPTS["entity_extraction_examples"])
262
+
263
  entity_extract_prompt = PROMPTS["entity_extraction"]
264
  context_base = dict(
265
  tuple_delimiter=PROMPTS["DEFAULT_TUPLE_DELIMITER"],
 
267
  completion_delimiter=PROMPTS["DEFAULT_COMPLETION_DELIMITER"],
268
  entity_types=",".join(PROMPTS["DEFAULT_ENTITY_TYPES"]),
269
  examples=examples,
270
+ language=language,
271
+ )
272
+
273
  continue_prompt = PROMPTS["entiti_continue_extraction"]
274
  if_loop_prompt = PROMPTS["entiti_if_loop_extraction"]
275
 
 
401
 
402
  return knowledge_graph_inst
403
 
404
+
405
  async def kg_query(
406
  query,
407
  knowledge_graph_inst: BaseGraphStorage,
 
414
  context = None
415
  example_number = global_config["addon_params"].get("example_number", None)
416
  if example_number and example_number < len(PROMPTS["keywords_extraction_examples"]):
417
+ examples = "\n".join(
418
+ PROMPTS["keywords_extraction_examples"][: int(example_number)]
419
+ )
420
  else:
421
+ examples = "\n".join(PROMPTS["keywords_extraction_examples"])
422
+
423
  # Set mode
424
  if query_param.mode not in ["local", "global", "hybrid"]:
425
  logger.error(f"Unknown mode {query_param.mode} in kg_query")
426
  return PROMPTS["fail_response"]
427
+
428
  # LLM generate keywords
429
  use_model_func = global_config["llm_model_func"]
430
  kw_prompt_temp = PROMPTS["keywords_extraction"]
431
+ kw_prompt = kw_prompt_temp.format(query=query, examples=examples)
432
+ result = await use_model_func(kw_prompt)
433
+ logger.info("kw_prompt result:")
434
  print(result)
435
  try:
436
  json_text = locate_json_string_body_from_string(result)
437
  keywords_data = json.loads(json_text)
438
  hl_keywords = keywords_data.get("high_level_keywords", [])
439
  ll_keywords = keywords_data.get("low_level_keywords", [])
440
+
441
  # Handle parsing error
442
  except json.JSONDecodeError as e:
443
  print(f"JSON parsing error: {e} {result}")
444
  return PROMPTS["fail_response"]
445
+
446
  # Handdle keywords missing
447
  if hl_keywords == [] and ll_keywords == []:
448
  logger.warning("low_level_keywords and high_level_keywords is empty")
449
+ return PROMPTS["fail_response"]
450
+ if ll_keywords == [] and query_param.mode in ["local", "hybrid"]:
451
  logger.warning("low_level_keywords is empty")
452
  return PROMPTS["fail_response"]
453
  else:
454
  ll_keywords = ", ".join(ll_keywords)
455
+ if hl_keywords == [] and query_param.mode in ["global", "hybrid"]:
456
  logger.warning("high_level_keywords is empty")
457
  return PROMPTS["fail_response"]
458
  else:
459
  hl_keywords = ", ".join(hl_keywords)
460
+
461
  # Build context
462
+ keywords = [ll_keywords, hl_keywords]
463
  context = await _build_query_context(
464
+ keywords,
465
+ knowledge_graph_inst,
466
+ entities_vdb,
467
+ relationships_vdb,
468
+ text_chunks_db,
469
+ query_param,
470
+ )
471
+
472
  if query_param.only_need_context:
473
  return context
474
  if context is None:
 
476
  sys_prompt_temp = PROMPTS["rag_response"]
477
  sys_prompt = sys_prompt_temp.format(
478
  context_data=context, response_type=query_param.response_type
479
+ )
480
  if query_param.only_need_prompt:
481
  return sys_prompt
482
  response = await use_model_func(
483
  query,
484
  system_prompt=sys_prompt,
485
+ )
486
  if len(response) > len(sys_prompt):
487
  response = (
488
  response.replace(sys_prompt, "")
 
504
  relationships_vdb: BaseVectorStorage,
505
  text_chunks_db: BaseKVStorage[TextChunkSchema],
506
  query_param: QueryParam,
507
+ ):
508
  ll_kewwords, hl_keywrds = query[0], query[1]
509
  if query_param.mode in ["local", "hybrid"]:
510
  if ll_kewwords == "":
511
+ ll_entities_context, ll_relations_context, ll_text_units_context = (
512
+ "",
513
+ "",
514
+ "",
515
+ )
516
+ warnings.warn(
517
+ "Low Level context is None. Return empty Low entity/relationship/source"
518
+ )
519
  query_param.mode = "global"
520
  else:
521
+ (
522
+ ll_entities_context,
523
+ ll_relations_context,
524
+ ll_text_units_context,
525
+ ) = await _get_node_data(
526
  ll_kewwords,
527
  knowledge_graph_inst,
528
  entities_vdb,
529
  text_chunks_db,
530
+ query_param,
531
+ )
532
  if query_param.mode in ["global", "hybrid"]:
533
  if hl_keywrds == "":
534
+ hl_entities_context, hl_relations_context, hl_text_units_context = (
535
+ "",
536
+ "",
537
+ "",
538
+ )
539
+ warnings.warn(
540
+ "High Level context is None. Return empty High entity/relationship/source"
541
+ )
542
  query_param.mode = "local"
543
  else:
544
+ (
545
+ hl_entities_context,
546
+ hl_relations_context,
547
+ hl_text_units_context,
548
+ ) = await _get_edge_data(
549
  hl_keywrds,
550
  knowledge_graph_inst,
551
  relationships_vdb,
552
  text_chunks_db,
553
+ query_param,
554
+ )
555
+ if query_param.mode == "hybrid":
556
+ entities_context, relations_context, text_units_context = combine_contexts(
557
+ [hl_entities_context, ll_entities_context],
558
+ [hl_relations_context, ll_relations_context],
559
+ [hl_text_units_context, ll_text_units_context],
560
+ )
561
+ elif query_param.mode == "local":
562
+ entities_context, relations_context, text_units_context = (
563
+ ll_entities_context,
564
+ ll_relations_context,
565
+ ll_text_units_context,
566
+ )
567
+ elif query_param.mode == "global":
568
+ entities_context, relations_context, text_units_context = (
569
+ hl_entities_context,
570
+ hl_relations_context,
571
+ hl_text_units_context,
572
+ )
573
  return f"""
574
  # -----Entities-----
575
  # ```csv
 
586
  # """
587
 
588
 
 
589
  async def _get_node_data(
590
  query,
591
  knowledge_graph_inst: BaseGraphStorage,
 
603
  )
604
  if not all([n is not None for n in node_datas]):
605
  logger.warning("Some nodes are missing, maybe the storage is damaged")
606
+
607
  # 获取实体的度
608
  node_degrees = await asyncio.gather(
609
  *[knowledge_graph_inst.node_degree(r["entity_name"]) for r in results]
 
623
  )
624
  logger.info(
625
  f"Local query uses {len(node_datas)} entites, {len(use_relations)} relations, {len(use_text_units)} text units"
626
+ )
627
 
628
  # 构建提示词
629
  entites_section_list = [["id", "entity", "type", "description", "rank"]]
 
660
  for i, t in enumerate(use_text_units):
661
  text_units_section_list.append([i, t["content"]])
662
  text_units_context = list_of_list_to_csv(text_units_section_list)
663
+ return entities_context, relations_context, text_units_context
664
 
665
 
666
  async def _find_most_related_text_unit_from_entities(
 
856
  for i, t in enumerate(use_text_units):
857
  text_units_section_list.append([i, t["content"]])
858
  text_units_context = list_of_list_to_csv(text_units_section_list)
859
+ return entities_context, relations_context, text_units_context
 
860
 
861
 
862
  async def _find_most_related_entities_from_relationships(
 
936
  def combine_contexts(entities, relationships, sources):
937
  # Function to extract entities, relationships, and sources from context strings
938
  hl_entities, ll_entities = entities[0], entities[1]
939
+ hl_relationships, ll_relationships = relationships[0], relationships[1]
940
  hl_sources, ll_sources = sources[0], sources[1]
941
  # Combine and deduplicate the entities
942
  combined_entities = process_combine_contexts(hl_entities, ll_entities)
lightrag/prompt.py CHANGED
@@ -52,7 +52,7 @@ Output:
52
  """
53
 
54
  PROMPTS["entity_extraction_examples"] = [
55
- """Example 1:
56
 
57
  Entity_types: [person, technology, mission, organization, location]
58
  Text:
@@ -77,7 +77,7 @@ Output:
77
  ("relationship"{tuple_delimiter}"Taylor"{tuple_delimiter}"The Device"{tuple_delimiter}"Taylor shows reverence towards the device, indicating its importance and potential impact."{tuple_delimiter}"reverence, technological significance"{tuple_delimiter}9){record_delimiter}
78
  ("content_keywords"{tuple_delimiter}"power dynamics, ideological conflict, discovery, rebellion"){completion_delimiter}
79
  #############################""",
80
- """Example 2:
81
 
82
  Entity_types: [person, technology, mission, organization, location]
83
  Text:
@@ -95,7 +95,7 @@ Output:
95
  ("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"The team is directly involved in Operation: Dulce, executing its evolved objectives and activities."{tuple_delimiter}"mission evolution, active participation"{tuple_delimiter}9){completion_delimiter}
96
  ("content_keywords"{tuple_delimiter}"mission evolution, decision-making, active participation, cosmic significance"){completion_delimiter}
97
  #############################""",
98
- """Example 3:
99
 
100
  Entity_types: [person, role, technology, organization, event, location, concept]
101
  Text:
@@ -121,10 +121,12 @@ Output:
121
  ("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"Alex and his team are the key figures in Humanity's Response to the unknown intelligence."{tuple_delimiter}"collective action, cosmic significance"{tuple_delimiter}8){record_delimiter}
122
  ("relationship"{tuple_delimiter}"Control"{tuple_delimiter}"Intelligence"{tuple_delimiter}"The concept of Control is challenged by the Intelligence that writes its own rules."{tuple_delimiter}"power dynamics, autonomy"{tuple_delimiter}7){record_delimiter}
123
  ("content_keywords"{tuple_delimiter}"first contact, control, communication, cosmic significance"){completion_delimiter}
124
- #############################"""
125
  ]
126
 
127
- PROMPTS["summarize_entity_descriptions"] = """You are a helpful assistant responsible for generating a comprehensive summary of the data provided below.
 
 
128
  Given one or two entities, and a list of descriptions, all related to the same entity or group of entities.
129
  Please concatenate all of these into a single, comprehensive description. Make sure to include information collected from all the descriptions.
130
  If the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary.
@@ -139,10 +141,14 @@ Description List: {description_list}
139
  Output:
140
  """
141
 
142
- PROMPTS["entiti_continue_extraction"] = """MANY entities were missed in the last extraction. Add them below using the same format:
 
 
143
  """
144
 
145
- PROMPTS["entiti_if_loop_extraction"] = """It appears some entities may have still been missed. Answer YES | NO if there are still entities that need to be added.
 
 
146
  """
147
 
148
  PROMPTS["fail_response"] = "Sorry, I'm not able to provide an answer to that question."
@@ -201,7 +207,7 @@ Output:
201
  """
202
 
203
  PROMPTS["keywords_extraction_examples"] = [
204
- """Example 1:
205
 
206
  Query: "How does international trade influence global economic stability?"
207
  ################
@@ -211,7 +217,7 @@ Output:
211
  "low_level_keywords": ["Trade agreements", "Tariffs", "Currency exchange", "Imports", "Exports"]
212
  }}
213
  #############################""",
214
- """Example 2:
215
 
216
  Query: "What are the environmental consequences of deforestation on biodiversity?"
217
  ################
@@ -220,8 +226,8 @@ Output:
220
  "high_level_keywords": ["Environmental consequences", "Deforestation", "Biodiversity loss"],
221
  "low_level_keywords": ["Species extinction", "Habitat destruction", "Carbon emissions", "Rainforest", "Ecosystem"]
222
  }}
223
- #############################""",
224
- """Example 3:
225
 
226
  Query: "What is the role of education in reducing poverty?"
227
  ################
@@ -230,8 +236,8 @@ Output:
230
  "high_level_keywords": ["Education", "Poverty reduction", "Socioeconomic development"],
231
  "low_level_keywords": ["School access", "Literacy rates", "Job training", "Income inequality"]
232
  }}
233
- #############################"""
234
- ]
235
 
236
 
237
  PROMPTS["naive_rag_response"] = """---Role---
 
52
  """
53
 
54
  PROMPTS["entity_extraction_examples"] = [
55
+ """Example 1:
56
 
57
  Entity_types: [person, technology, mission, organization, location]
58
  Text:
 
77
  ("relationship"{tuple_delimiter}"Taylor"{tuple_delimiter}"The Device"{tuple_delimiter}"Taylor shows reverence towards the device, indicating its importance and potential impact."{tuple_delimiter}"reverence, technological significance"{tuple_delimiter}9){record_delimiter}
78
  ("content_keywords"{tuple_delimiter}"power dynamics, ideological conflict, discovery, rebellion"){completion_delimiter}
79
  #############################""",
80
+ """Example 2:
81
 
82
  Entity_types: [person, technology, mission, organization, location]
83
  Text:
 
95
  ("relationship"{tuple_delimiter}"The team"{tuple_delimiter}"Operation: Dulce"{tuple_delimiter}"The team is directly involved in Operation: Dulce, executing its evolved objectives and activities."{tuple_delimiter}"mission evolution, active participation"{tuple_delimiter}9){completion_delimiter}
96
  ("content_keywords"{tuple_delimiter}"mission evolution, decision-making, active participation, cosmic significance"){completion_delimiter}
97
  #############################""",
98
+ """Example 3:
99
 
100
  Entity_types: [person, role, technology, organization, event, location, concept]
101
  Text:
 
121
  ("relationship"{tuple_delimiter}"Alex"{tuple_delimiter}"Humanity's Response"{tuple_delimiter}"Alex and his team are the key figures in Humanity's Response to the unknown intelligence."{tuple_delimiter}"collective action, cosmic significance"{tuple_delimiter}8){record_delimiter}
122
  ("relationship"{tuple_delimiter}"Control"{tuple_delimiter}"Intelligence"{tuple_delimiter}"The concept of Control is challenged by the Intelligence that writes its own rules."{tuple_delimiter}"power dynamics, autonomy"{tuple_delimiter}7){record_delimiter}
123
  ("content_keywords"{tuple_delimiter}"first contact, control, communication, cosmic significance"){completion_delimiter}
124
+ #############################""",
125
  ]
126
 
127
+ PROMPTS[
128
+ "summarize_entity_descriptions"
129
+ ] = """You are a helpful assistant responsible for generating a comprehensive summary of the data provided below.
130
  Given one or two entities, and a list of descriptions, all related to the same entity or group of entities.
131
  Please concatenate all of these into a single, comprehensive description. Make sure to include information collected from all the descriptions.
132
  If the provided descriptions are contradictory, please resolve the contradictions and provide a single, coherent summary.
 
141
  Output:
142
  """
143
 
144
+ PROMPTS[
145
+ "entiti_continue_extraction"
146
+ ] = """MANY entities were missed in the last extraction. Add them below using the same format:
147
  """
148
 
149
+ PROMPTS[
150
+ "entiti_if_loop_extraction"
151
+ ] = """It appears some entities may have still been missed. Answer YES | NO if there are still entities that need to be added.
152
  """
153
 
154
  PROMPTS["fail_response"] = "Sorry, I'm not able to provide an answer to that question."
 
207
  """
208
 
209
  PROMPTS["keywords_extraction_examples"] = [
210
+ """Example 1:
211
 
212
  Query: "How does international trade influence global economic stability?"
213
  ################
 
217
  "low_level_keywords": ["Trade agreements", "Tariffs", "Currency exchange", "Imports", "Exports"]
218
  }}
219
  #############################""",
220
+ """Example 2:
221
 
222
  Query: "What are the environmental consequences of deforestation on biodiversity?"
223
  ################
 
226
  "high_level_keywords": ["Environmental consequences", "Deforestation", "Biodiversity loss"],
227
  "low_level_keywords": ["Species extinction", "Habitat destruction", "Carbon emissions", "Rainforest", "Ecosystem"]
228
  }}
229
+ #############################""",
230
+ """Example 3:
231
 
232
  Query: "What is the role of education in reducing poverty?"
233
  ################
 
236
  "high_level_keywords": ["Education", "Poverty reduction", "Socioeconomic development"],
237
  "low_level_keywords": ["School access", "Literacy rates", "Job training", "Income inequality"]
238
  }}
239
+ #############################""",
240
+ ]
241
 
242
 
243
  PROMPTS["naive_rag_response"] = """---Role---
lightrag/utils.py CHANGED
@@ -56,7 +56,8 @@ def locate_json_string_body_from_string(content: str) -> Union[str, None]:
56
  maybe_json_str = maybe_json_str.replace("'", '"')
57
  json.loads(maybe_json_str)
58
  return maybe_json_str
59
- except:
 
60
  # try:
61
  # content = (
62
  # content.replace(kw_prompt[:-1], "")
@@ -64,9 +65,9 @@ def locate_json_string_body_from_string(content: str) -> Union[str, None]:
64
  # .replace("model", "")
65
  # .strip()
66
  # )
67
- # maybe_json_str = "{" + content.split("{")[1].split("}")[0] + "}"
68
  # json.loads(maybe_json_str)
69
-
70
  return None
71
 
72
 
 
56
  maybe_json_str = maybe_json_str.replace("'", '"')
57
  json.loads(maybe_json_str)
58
  return maybe_json_str
59
+ except Exception:
60
+ pass
61
  # try:
62
  # content = (
63
  # content.replace(kw_prompt[:-1], "")
 
65
  # .replace("model", "")
66
  # .strip()
67
  # )
68
+ # maybe_json_str = "{" + content.split("{")[1].split("}")[0] + "}"
69
  # json.loads(maybe_json_str)
70
+
71
  return None
72
 
73