LarFii commited on
Commit
098ff27
·
1 Parent(s): f5099cd
Files changed (1) hide show
  1. lightrag/operate.py +10 -36
lightrag/operate.py CHANGED
@@ -172,7 +172,7 @@ async def _handle_single_entity_extraction(
172
  entity_type=entity_type,
173
  description=entity_description,
174
  source_id=chunk_key,
175
- metadata={"created_at": time.time(), "file_path": file_path},
176
  )
177
 
178
 
@@ -201,7 +201,7 @@ async def _handle_single_relationship_extraction(
201
  description=edge_description,
202
  keywords=edge_keywords,
203
  source_id=edge_source_id,
204
- metadata={"created_at": time.time(), "file_path": file_path},
205
  )
206
 
207
 
@@ -224,9 +224,7 @@ async def _merge_nodes_then_upsert(
224
  split_string_by_multi_markers(already_node["source_id"], [GRAPH_FIELD_SEP])
225
  )
226
  already_file_paths.extend(
227
- split_string_by_multi_markers(
228
- already_node["metadata"]["file_path"], [GRAPH_FIELD_SEP]
229
- )
230
  )
231
  already_description.append(already_node["description"])
232
 
@@ -244,7 +242,7 @@ async def _merge_nodes_then_upsert(
244
  set([dp["source_id"] for dp in nodes_data] + already_source_ids)
245
  )
246
  file_path = GRAPH_FIELD_SEP.join(
247
- set([dp["metadata"]["file_path"] for dp in nodes_data] + already_file_paths)
248
  )
249
 
250
  logger.debug(f"file_path: {file_path}")
@@ -298,7 +296,7 @@ async def _merge_edges_then_upsert(
298
  if already_edge.get("file_path") is not None:
299
  already_file_paths.extend(
300
  split_string_by_multi_markers(
301
- already_edge["metadata"]["file_path"], [GRAPH_FIELD_SEP]
302
  )
303
  )
304
 
@@ -340,11 +338,7 @@ async def _merge_edges_then_upsert(
340
  )
341
  file_path = GRAPH_FIELD_SEP.join(
342
  set(
343
- [
344
- dp["metadata"]["file_path"]
345
- for dp in edges_data
346
- if dp.get("metadata", {}).get("file_path")
347
- ]
348
  + already_file_paths
349
  )
350
  )
@@ -679,10 +673,6 @@ async def extract_entities(
679
  "content": f"{dp['entity_name']}\n{dp['description']}",
680
  "source_id": dp["source_id"],
681
  "file_path": dp.get("file_path", "unknown_source"),
682
- "metadata": {
683
- "created_at": dp.get("created_at", time.time()),
684
- "file_path": dp.get("file_path", "unknown_source"),
685
- },
686
  }
687
  for dp in all_entities_data
688
  }
@@ -697,10 +687,6 @@ async def extract_entities(
697
  "content": f"{dp['src_id']}\t{dp['tgt_id']}\n{dp['keywords']}\n{dp['description']}",
698
  "source_id": dp["source_id"],
699
  "file_path": dp.get("file_path", "unknown_source"),
700
- "metadata": {
701
- "created_at": dp.get("created_at", time.time()),
702
- "file_path": dp.get("file_path", "unknown_source"),
703
- },
704
  }
705
  for dp in all_relationships_data
706
  }
@@ -1285,11 +1271,8 @@ async def _get_node_data(
1285
  if isinstance(created_at, (int, float)):
1286
  created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
1287
 
1288
- # Get file path from metadata or directly from node data
1289
  file_path = n.get("file_path", "unknown_source")
1290
- if not file_path or file_path == "unknown_source":
1291
- # Try to get from metadata
1292
- file_path = n.get("metadata", {}).get("file_path", "unknown_source")
1293
 
1294
  entites_section_list.append(
1295
  [
@@ -1323,11 +1306,8 @@ async def _get_node_data(
1323
  if isinstance(created_at, (int, float)):
1324
  created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
1325
 
1326
- # Get file path from metadata or directly from edge data
1327
  file_path = e.get("file_path", "unknown_source")
1328
- if not file_path or file_path == "unknown_source":
1329
- # Try to get from metadata
1330
- file_path = e.get("metadata", {}).get("file_path", "unknown_source")
1331
 
1332
  relations_section_list.append(
1333
  [
@@ -1564,11 +1544,8 @@ async def _get_edge_data(
1564
  if isinstance(created_at, (int, float)):
1565
  created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
1566
 
1567
- # Get file path from metadata or directly from edge data
1568
  file_path = e.get("file_path", "unknown_source")
1569
- if not file_path or file_path == "unknown_source":
1570
- # Try to get from metadata
1571
- file_path = e.get("metadata", {}).get("file_path", "unknown_source")
1572
 
1573
  relations_section_list.append(
1574
  [
@@ -1594,11 +1571,8 @@ async def _get_edge_data(
1594
  if isinstance(created_at, (int, float)):
1595
  created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
1596
 
1597
- # Get file path from metadata or directly from node data
1598
  file_path = n.get("file_path", "unknown_source")
1599
- if not file_path or file_path == "unknown_source":
1600
- # Try to get from metadata
1601
- file_path = n.get("metadata", {}).get("file_path", "unknown_source")
1602
 
1603
  entites_section_list.append(
1604
  [
 
172
  entity_type=entity_type,
173
  description=entity_description,
174
  source_id=chunk_key,
175
+ file_path=file_path,
176
  )
177
 
178
 
 
201
  description=edge_description,
202
  keywords=edge_keywords,
203
  source_id=edge_source_id,
204
+ file_path=file_path,
205
  )
206
 
207
 
 
224
  split_string_by_multi_markers(already_node["source_id"], [GRAPH_FIELD_SEP])
225
  )
226
  already_file_paths.extend(
227
+ split_string_by_multi_markers(already_node["file_path"], [GRAPH_FIELD_SEP])
 
 
228
  )
229
  already_description.append(already_node["description"])
230
 
 
242
  set([dp["source_id"] for dp in nodes_data] + already_source_ids)
243
  )
244
  file_path = GRAPH_FIELD_SEP.join(
245
+ set([dp["file_path"] for dp in nodes_data] + already_file_paths)
246
  )
247
 
248
  logger.debug(f"file_path: {file_path}")
 
296
  if already_edge.get("file_path") is not None:
297
  already_file_paths.extend(
298
  split_string_by_multi_markers(
299
+ already_edge["file_path"], [GRAPH_FIELD_SEP]
300
  )
301
  )
302
 
 
338
  )
339
  file_path = GRAPH_FIELD_SEP.join(
340
  set(
341
+ [dp["file_path"] for dp in edges_data if dp.get("file_path")]
 
 
 
 
342
  + already_file_paths
343
  )
344
  )
 
673
  "content": f"{dp['entity_name']}\n{dp['description']}",
674
  "source_id": dp["source_id"],
675
  "file_path": dp.get("file_path", "unknown_source"),
 
 
 
 
676
  }
677
  for dp in all_entities_data
678
  }
 
687
  "content": f"{dp['src_id']}\t{dp['tgt_id']}\n{dp['keywords']}\n{dp['description']}",
688
  "source_id": dp["source_id"],
689
  "file_path": dp.get("file_path", "unknown_source"),
 
 
 
 
690
  }
691
  for dp in all_relationships_data
692
  }
 
1271
  if isinstance(created_at, (int, float)):
1272
  created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
1273
 
1274
+ # Get file path from node data
1275
  file_path = n.get("file_path", "unknown_source")
 
 
 
1276
 
1277
  entites_section_list.append(
1278
  [
 
1306
  if isinstance(created_at, (int, float)):
1307
  created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
1308
 
1309
+ # Get file path from edge data
1310
  file_path = e.get("file_path", "unknown_source")
 
 
 
1311
 
1312
  relations_section_list.append(
1313
  [
 
1544
  if isinstance(created_at, (int, float)):
1545
  created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
1546
 
1547
+ # Get file path from edge data
1548
  file_path = e.get("file_path", "unknown_source")
 
 
 
1549
 
1550
  relations_section_list.append(
1551
  [
 
1571
  if isinstance(created_at, (int, float)):
1572
  created_at = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(created_at))
1573
 
1574
+ # Get file path from node data
1575
  file_path = n.get("file_path", "unknown_source")
 
 
 
1576
 
1577
  entites_section_list.append(
1578
  [