gzdaniel commited on
Commit
ead12ac
·
1 Parent(s): 5c8c3d9

Remove list_of_list_to_dict function

Browse files
Files changed (2) hide show
  1. lightrag/operate.py +71 -111
  2. lightrag/utils.py +0 -38
lightrag/operate.py CHANGED
@@ -25,7 +25,6 @@ from .utils import (
25
  CacheData,
26
  get_conversation_turns,
27
  use_llm_func_with_cache,
28
- list_of_list_to_dict,
29
  )
30
  from .base import (
31
  BaseGraphStorage,
@@ -1175,22 +1174,17 @@ async def _get_vector_context(
1175
  entities_context = []
1176
  relations_context = []
1177
 
1178
- # Create text_units_context in the same format as _get_edge_data and _get_node_data
1179
- text_units_section_list = [["id", "content", "file_path"]]
1180
-
1181
  for i, chunk in enumerate(maybe_trun_chunks):
1182
- # Add to text_units_section_list
1183
- text_units_section_list.append(
1184
- [
1185
- i + 1, # id
1186
- chunk["content"], # content
1187
- chunk["file_path"], # file_path
1188
- ]
1189
  )
1190
 
1191
- # Convert to dictionary format using list_of_list_to_dict
1192
- text_units_context = list_of_list_to_dict(text_units_section_list)
1193
-
1194
  return entities_context, relations_context, text_units_context
1195
  except Exception as e:
1196
  logger.error(f"Error in _get_vector_context: {e}")
@@ -1398,17 +1392,7 @@ async def _get_node_data(
1398
  )
1399
 
1400
  # build prompt
1401
- entites_section_list = [
1402
- [
1403
- "id",
1404
- "entity",
1405
- "type",
1406
- "description",
1407
- "rank",
1408
- "created_at",
1409
- "file_path",
1410
- ]
1411
- ]
1412
  for i, n in enumerate(node_datas):
1413
  created_at = n.get("created_at", "UNKNOWN")
1414
  if isinstance(created_at, (int, float)):
@@ -1417,32 +1401,19 @@ async def _get_node_data(
1417
  # Get file path from node data
1418
  file_path = n.get("file_path", "unknown_source")
1419
 
1420
- entites_section_list.append(
1421
- [
1422
- i + 1,
1423
- n["entity_name"],
1424
- n.get("entity_type", "UNKNOWN"),
1425
- n.get("description", "UNKNOWN"),
1426
- n["rank"],
1427
- created_at,
1428
- file_path,
1429
- ]
1430
  )
1431
- entities_context = list_of_list_to_dict(entites_section_list)
1432
-
1433
- relations_section_list = [
1434
- [
1435
- "id",
1436
- "entity1",
1437
- "entity2",
1438
- "description",
1439
- "keywords",
1440
- "weight",
1441
- "rank",
1442
- "created_at",
1443
- "file_path",
1444
- ]
1445
- ]
1446
  for i, e in enumerate(use_relations):
1447
  created_at = e.get("created_at", "UNKNOWN")
1448
  # Convert timestamp to readable format
@@ -1452,27 +1423,29 @@ async def _get_node_data(
1452
  # Get file path from edge data
1453
  file_path = e.get("file_path", "unknown_source")
1454
 
1455
- relations_section_list.append(
1456
- [
1457
- i + 1,
1458
- e["src_tgt"][0],
1459
- e["src_tgt"][1],
1460
- e["description"],
1461
- e["keywords"],
1462
- e["weight"],
1463
- e["rank"],
1464
- created_at,
1465
- file_path,
1466
- ]
1467
  )
1468
- relations_context = list_of_list_to_dict(relations_section_list)
1469
 
1470
- text_units_section_list = [["id", "content", "file_path"]]
1471
  for i, t in enumerate(use_text_units):
1472
- text_units_section_list.append(
1473
- [i + 1, t["content"], t.get("file_path", "unknown_source")]
 
 
 
 
1474
  )
1475
- text_units_context = list_of_list_to_dict(text_units_section_list)
1476
  return entities_context, relations_context, text_units_context
1477
 
1478
 
@@ -1715,19 +1688,7 @@ async def _get_edge_data(
1715
  f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
1716
  )
1717
 
1718
- relations_section_list = [
1719
- [
1720
- "id",
1721
- "entity1",
1722
- "entity2",
1723
- "description",
1724
- "keywords",
1725
- "weight",
1726
- "rank",
1727
- "created_at",
1728
- "file_path",
1729
- ]
1730
- ]
1731
  for i, e in enumerate(edge_datas):
1732
  created_at = e.get("created_at", "UNKNOWN")
1733
  # Convert timestamp to readable format
@@ -1737,24 +1698,21 @@ async def _get_edge_data(
1737
  # Get file path from edge data
1738
  file_path = e.get("file_path", "unknown_source")
1739
 
1740
- relations_section_list.append(
1741
- [
1742
- i + 1,
1743
- e["src_id"],
1744
- e["tgt_id"],
1745
- e["description"],
1746
- e["keywords"],
1747
- e["weight"],
1748
- e["rank"],
1749
- created_at,
1750
- file_path,
1751
- ]
1752
  )
1753
- relations_context = list_of_list_to_dict(relations_section_list)
1754
 
1755
- entites_section_list = [
1756
- ["id", "entity", "type", "description", "rank", "created_at", "file_path"]
1757
- ]
1758
  for i, n in enumerate(use_entities):
1759
  created_at = n.get("created_at", "UNKNOWN")
1760
  # Convert timestamp to readable format
@@ -1764,25 +1722,27 @@ async def _get_edge_data(
1764
  # Get file path from node data
1765
  file_path = n.get("file_path", "unknown_source")
1766
 
1767
- entites_section_list.append(
1768
- [
1769
- i + 1,
1770
- n["entity_name"],
1771
- n.get("entity_type", "UNKNOWN"),
1772
- n.get("description", "UNKNOWN"),
1773
- n["rank"],
1774
- created_at,
1775
- file_path,
1776
- ]
1777
  )
1778
- entities_context = list_of_list_to_dict(entites_section_list)
1779
 
1780
- text_units_section_list = [["id", "content", "file_path"]]
1781
  for i, t in enumerate(use_text_units):
1782
- text_units_section_list.append(
1783
- [i + 1, t["content"], t.get("file_path", "unknown")]
 
 
 
 
1784
  )
1785
- text_units_context = list_of_list_to_dict(text_units_section_list)
1786
  return entities_context, relations_context, text_units_context
1787
 
1788
 
 
25
  CacheData,
26
  get_conversation_turns,
27
  use_llm_func_with_cache,
 
28
  )
29
  from .base import (
30
  BaseGraphStorage,
 
1174
  entities_context = []
1175
  relations_context = []
1176
 
1177
+ # Create text_units_context directly as a list of dictionaries
1178
+ text_units_context = []
 
1179
  for i, chunk in enumerate(maybe_trun_chunks):
1180
+ text_units_context.append(
1181
+ {
1182
+ "id": i + 1,
1183
+ "content": chunk["content"],
1184
+ "file_path": chunk["file_path"],
1185
+ }
 
1186
  )
1187
 
 
 
 
1188
  return entities_context, relations_context, text_units_context
1189
  except Exception as e:
1190
  logger.error(f"Error in _get_vector_context: {e}")
 
1392
  )
1393
 
1394
  # build prompt
1395
+ entities_context = []
 
 
 
 
 
 
 
 
 
 
1396
  for i, n in enumerate(node_datas):
1397
  created_at = n.get("created_at", "UNKNOWN")
1398
  if isinstance(created_at, (int, float)):
 
1401
  # Get file path from node data
1402
  file_path = n.get("file_path", "unknown_source")
1403
 
1404
+ entities_context.append(
1405
+ {
1406
+ "id": i + 1,
1407
+ "entity": n["entity_name"],
1408
+ "type": n.get("entity_type", "UNKNOWN"),
1409
+ "description": n.get("description", "UNKNOWN"),
1410
+ "rank": n["rank"],
1411
+ "created_at": created_at,
1412
+ "file_path": file_path,
1413
+ }
1414
  )
1415
+
1416
+ relations_context = []
 
 
 
 
 
 
 
 
 
 
 
 
 
1417
  for i, e in enumerate(use_relations):
1418
  created_at = e.get("created_at", "UNKNOWN")
1419
  # Convert timestamp to readable format
 
1423
  # Get file path from edge data
1424
  file_path = e.get("file_path", "unknown_source")
1425
 
1426
+ relations_context.append(
1427
+ {
1428
+ "id": i + 1,
1429
+ "entity1": e["src_tgt"][0],
1430
+ "entity2": e["src_tgt"][1],
1431
+ "description": e["description"],
1432
+ "keywords": e["keywords"],
1433
+ "weight": e["weight"],
1434
+ "rank": e["rank"],
1435
+ "created_at": created_at,
1436
+ "file_path": file_path,
1437
+ }
1438
  )
 
1439
 
1440
+ text_units_context = []
1441
  for i, t in enumerate(use_text_units):
1442
+ text_units_context.append(
1443
+ {
1444
+ "id": i + 1,
1445
+ "content": t["content"],
1446
+ "file_path": t.get("file_path", "unknown_source"),
1447
+ }
1448
  )
 
1449
  return entities_context, relations_context, text_units_context
1450
 
1451
 
 
1688
  f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
1689
  )
1690
 
1691
+ relations_context = []
 
 
 
 
 
 
 
 
 
 
 
 
1692
  for i, e in enumerate(edge_datas):
1693
  created_at = e.get("created_at", "UNKNOWN")
1694
  # Convert timestamp to readable format
 
1698
  # Get file path from edge data
1699
  file_path = e.get("file_path", "unknown_source")
1700
 
1701
+ relations_context.append(
1702
+ {
1703
+ "id": i + 1,
1704
+ "entity1": e["src_id"],
1705
+ "entity2": e["tgt_id"],
1706
+ "description": e["description"],
1707
+ "keywords": e["keywords"],
1708
+ "weight": e["weight"],
1709
+ "rank": e["rank"],
1710
+ "created_at": created_at,
1711
+ "file_path": file_path,
1712
+ }
1713
  )
 
1714
 
1715
+ entities_context = []
 
 
1716
  for i, n in enumerate(use_entities):
1717
  created_at = n.get("created_at", "UNKNOWN")
1718
  # Convert timestamp to readable format
 
1722
  # Get file path from node data
1723
  file_path = n.get("file_path", "unknown_source")
1724
 
1725
+ entities_context.append(
1726
+ {
1727
+ "id": i + 1,
1728
+ "entity": n["entity_name"],
1729
+ "type": n.get("entity_type", "UNKNOWN"),
1730
+ "description": n.get("description", "UNKNOWN"),
1731
+ "rank": n["rank"],
1732
+ "created_at": created_at,
1733
+ "file_path": file_path,
1734
+ }
1735
  )
 
1736
 
1737
+ text_units_context = []
1738
  for i, t in enumerate(use_text_units):
1739
+ text_units_context.append(
1740
+ {
1741
+ "id": i + 1,
1742
+ "content": t["content"],
1743
+ "file_path": t.get("file_path", "unknown"),
1744
+ }
1745
  )
 
1746
  return entities_context, relations_context, text_units_context
1747
 
1748
 
lightrag/utils.py CHANGED
@@ -719,44 +719,6 @@ def truncate_list_by_token_size(
719
  return list_data
720
 
721
 
722
- def list_of_list_to_dict(data: list[list[str]]) -> list[dict[str, str]]:
723
- """Convert a 2D string list (table-like data) into a list of dictionaries.
724
-
725
- The first row is treated as header containing field names. Subsequent rows become
726
- dictionary entries where keys come from header and values from row data.
727
-
728
- Args:
729
- data: 2D string array where first row contains headers and rest are data rows.
730
- Minimum 2 columns required in data rows (rows with <2 elements are skipped).
731
-
732
- Returns:
733
- List of dictionaries where each dict represents a data row with:
734
- - Keys: Header values from first row
735
- - Values: Corresponding row values (empty string if missing)
736
-
737
- Example:
738
- Input: [["Name","Age"], ["Alice","23"], ["Bob"]]
739
- Output: [{"Name":"Alice","Age":"23"}, {"Name":"Bob","Age":""}]
740
- """
741
- if not data or len(data) <= 1:
742
- return []
743
-
744
- header = data[0]
745
- result = []
746
-
747
- for row in data[1:]:
748
- if len(row) >= 2:
749
- item = {}
750
- for i, field_name in enumerate(header):
751
- if i < len(row):
752
- item[field_name] = str(row[i])
753
- else:
754
- item[field_name] = ""
755
- result.append(item)
756
-
757
- return result
758
-
759
-
760
  def save_data_to_file(data, file_name):
761
  with open(file_name, "w", encoding="utf-8") as f:
762
  json.dump(data, f, ensure_ascii=False, indent=4)
 
719
  return list_data
720
 
721
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
722
  def save_data_to_file(data, file_name):
723
  with open(file_name, "w", encoding="utf-8") as f:
724
  json.dump(data, f, ensure_ascii=False, indent=4)