Remove list_of_list_to_dict function
Browse files- lightrag/operate.py +71 -111
- lightrag/utils.py +0 -38
lightrag/operate.py
CHANGED
@@ -25,7 +25,6 @@ from .utils import (
|
|
25 |
CacheData,
|
26 |
get_conversation_turns,
|
27 |
use_llm_func_with_cache,
|
28 |
-
list_of_list_to_dict,
|
29 |
)
|
30 |
from .base import (
|
31 |
BaseGraphStorage,
|
@@ -1175,22 +1174,17 @@ async def _get_vector_context(
|
|
1175 |
entities_context = []
|
1176 |
relations_context = []
|
1177 |
|
1178 |
-
# Create text_units_context
|
1179 |
-
|
1180 |
-
|
1181 |
for i, chunk in enumerate(maybe_trun_chunks):
|
1182 |
-
|
1183 |
-
|
1184 |
-
|
1185 |
-
|
1186 |
-
chunk["
|
1187 |
-
|
1188 |
-
]
|
1189 |
)
|
1190 |
|
1191 |
-
# Convert to dictionary format using list_of_list_to_dict
|
1192 |
-
text_units_context = list_of_list_to_dict(text_units_section_list)
|
1193 |
-
|
1194 |
return entities_context, relations_context, text_units_context
|
1195 |
except Exception as e:
|
1196 |
logger.error(f"Error in _get_vector_context: {e}")
|
@@ -1398,17 +1392,7 @@ async def _get_node_data(
|
|
1398 |
)
|
1399 |
|
1400 |
# build prompt
|
1401 |
-
|
1402 |
-
[
|
1403 |
-
"id",
|
1404 |
-
"entity",
|
1405 |
-
"type",
|
1406 |
-
"description",
|
1407 |
-
"rank",
|
1408 |
-
"created_at",
|
1409 |
-
"file_path",
|
1410 |
-
]
|
1411 |
-
]
|
1412 |
for i, n in enumerate(node_datas):
|
1413 |
created_at = n.get("created_at", "UNKNOWN")
|
1414 |
if isinstance(created_at, (int, float)):
|
@@ -1417,32 +1401,19 @@ async def _get_node_data(
|
|
1417 |
# Get file path from node data
|
1418 |
file_path = n.get("file_path", "unknown_source")
|
1419 |
|
1420 |
-
|
1421 |
-
|
1422 |
-
i + 1,
|
1423 |
-
n["entity_name"],
|
1424 |
-
n.get("entity_type", "UNKNOWN"),
|
1425 |
-
n.get("description", "UNKNOWN"),
|
1426 |
-
n["rank"],
|
1427 |
-
created_at,
|
1428 |
-
file_path,
|
1429 |
-
|
1430 |
)
|
1431 |
-
|
1432 |
-
|
1433 |
-
relations_section_list = [
|
1434 |
-
[
|
1435 |
-
"id",
|
1436 |
-
"entity1",
|
1437 |
-
"entity2",
|
1438 |
-
"description",
|
1439 |
-
"keywords",
|
1440 |
-
"weight",
|
1441 |
-
"rank",
|
1442 |
-
"created_at",
|
1443 |
-
"file_path",
|
1444 |
-
]
|
1445 |
-
]
|
1446 |
for i, e in enumerate(use_relations):
|
1447 |
created_at = e.get("created_at", "UNKNOWN")
|
1448 |
# Convert timestamp to readable format
|
@@ -1452,27 +1423,29 @@ async def _get_node_data(
|
|
1452 |
# Get file path from edge data
|
1453 |
file_path = e.get("file_path", "unknown_source")
|
1454 |
|
1455 |
-
|
1456 |
-
|
1457 |
-
i + 1,
|
1458 |
-
e["src_tgt"][0],
|
1459 |
-
e["src_tgt"][1],
|
1460 |
-
e["description"],
|
1461 |
-
e["keywords"],
|
1462 |
-
e["weight"],
|
1463 |
-
e["rank"],
|
1464 |
-
created_at,
|
1465 |
-
file_path,
|
1466 |
-
|
1467 |
)
|
1468 |
-
relations_context = list_of_list_to_dict(relations_section_list)
|
1469 |
|
1470 |
-
|
1471 |
for i, t in enumerate(use_text_units):
|
1472 |
-
|
1473 |
-
|
|
|
|
|
|
|
|
|
1474 |
)
|
1475 |
-
text_units_context = list_of_list_to_dict(text_units_section_list)
|
1476 |
return entities_context, relations_context, text_units_context
|
1477 |
|
1478 |
|
@@ -1715,19 +1688,7 @@ async def _get_edge_data(
|
|
1715 |
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
|
1716 |
)
|
1717 |
|
1718 |
-
|
1719 |
-
[
|
1720 |
-
"id",
|
1721 |
-
"entity1",
|
1722 |
-
"entity2",
|
1723 |
-
"description",
|
1724 |
-
"keywords",
|
1725 |
-
"weight",
|
1726 |
-
"rank",
|
1727 |
-
"created_at",
|
1728 |
-
"file_path",
|
1729 |
-
]
|
1730 |
-
]
|
1731 |
for i, e in enumerate(edge_datas):
|
1732 |
created_at = e.get("created_at", "UNKNOWN")
|
1733 |
# Convert timestamp to readable format
|
@@ -1737,24 +1698,21 @@ async def _get_edge_data(
|
|
1737 |
# Get file path from edge data
|
1738 |
file_path = e.get("file_path", "unknown_source")
|
1739 |
|
1740 |
-
|
1741 |
-
|
1742 |
-
i + 1,
|
1743 |
-
e["src_id"],
|
1744 |
-
e["tgt_id"],
|
1745 |
-
e["description"],
|
1746 |
-
e["keywords"],
|
1747 |
-
e["weight"],
|
1748 |
-
e["rank"],
|
1749 |
-
created_at,
|
1750 |
-
file_path,
|
1751 |
-
|
1752 |
)
|
1753 |
-
relations_context = list_of_list_to_dict(relations_section_list)
|
1754 |
|
1755 |
-
|
1756 |
-
["id", "entity", "type", "description", "rank", "created_at", "file_path"]
|
1757 |
-
]
|
1758 |
for i, n in enumerate(use_entities):
|
1759 |
created_at = n.get("created_at", "UNKNOWN")
|
1760 |
# Convert timestamp to readable format
|
@@ -1764,25 +1722,27 @@ async def _get_edge_data(
|
|
1764 |
# Get file path from node data
|
1765 |
file_path = n.get("file_path", "unknown_source")
|
1766 |
|
1767 |
-
|
1768 |
-
|
1769 |
-
i + 1,
|
1770 |
-
n["entity_name"],
|
1771 |
-
n.get("entity_type", "UNKNOWN"),
|
1772 |
-
n.get("description", "UNKNOWN"),
|
1773 |
-
n["rank"],
|
1774 |
-
created_at,
|
1775 |
-
file_path,
|
1776 |
-
|
1777 |
)
|
1778 |
-
entities_context = list_of_list_to_dict(entites_section_list)
|
1779 |
|
1780 |
-
|
1781 |
for i, t in enumerate(use_text_units):
|
1782 |
-
|
1783 |
-
|
|
|
|
|
|
|
|
|
1784 |
)
|
1785 |
-
text_units_context = list_of_list_to_dict(text_units_section_list)
|
1786 |
return entities_context, relations_context, text_units_context
|
1787 |
|
1788 |
|
|
|
25 |
CacheData,
|
26 |
get_conversation_turns,
|
27 |
use_llm_func_with_cache,
|
|
|
28 |
)
|
29 |
from .base import (
|
30 |
BaseGraphStorage,
|
|
|
1174 |
entities_context = []
|
1175 |
relations_context = []
|
1176 |
|
1177 |
+
# Create text_units_context directly as a list of dictionaries
|
1178 |
+
text_units_context = []
|
|
|
1179 |
for i, chunk in enumerate(maybe_trun_chunks):
|
1180 |
+
text_units_context.append(
|
1181 |
+
{
|
1182 |
+
"id": i + 1,
|
1183 |
+
"content": chunk["content"],
|
1184 |
+
"file_path": chunk["file_path"],
|
1185 |
+
}
|
|
|
1186 |
)
|
1187 |
|
|
|
|
|
|
|
1188 |
return entities_context, relations_context, text_units_context
|
1189 |
except Exception as e:
|
1190 |
logger.error(f"Error in _get_vector_context: {e}")
|
|
|
1392 |
)
|
1393 |
|
1394 |
# build prompt
|
1395 |
+
entities_context = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1396 |
for i, n in enumerate(node_datas):
|
1397 |
created_at = n.get("created_at", "UNKNOWN")
|
1398 |
if isinstance(created_at, (int, float)):
|
|
|
1401 |
# Get file path from node data
|
1402 |
file_path = n.get("file_path", "unknown_source")
|
1403 |
|
1404 |
+
entities_context.append(
|
1405 |
+
{
|
1406 |
+
"id": i + 1,
|
1407 |
+
"entity": n["entity_name"],
|
1408 |
+
"type": n.get("entity_type", "UNKNOWN"),
|
1409 |
+
"description": n.get("description", "UNKNOWN"),
|
1410 |
+
"rank": n["rank"],
|
1411 |
+
"created_at": created_at,
|
1412 |
+
"file_path": file_path,
|
1413 |
+
}
|
1414 |
)
|
1415 |
+
|
1416 |
+
relations_context = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1417 |
for i, e in enumerate(use_relations):
|
1418 |
created_at = e.get("created_at", "UNKNOWN")
|
1419 |
# Convert timestamp to readable format
|
|
|
1423 |
# Get file path from edge data
|
1424 |
file_path = e.get("file_path", "unknown_source")
|
1425 |
|
1426 |
+
relations_context.append(
|
1427 |
+
{
|
1428 |
+
"id": i + 1,
|
1429 |
+
"entity1": e["src_tgt"][0],
|
1430 |
+
"entity2": e["src_tgt"][1],
|
1431 |
+
"description": e["description"],
|
1432 |
+
"keywords": e["keywords"],
|
1433 |
+
"weight": e["weight"],
|
1434 |
+
"rank": e["rank"],
|
1435 |
+
"created_at": created_at,
|
1436 |
+
"file_path": file_path,
|
1437 |
+
}
|
1438 |
)
|
|
|
1439 |
|
1440 |
+
text_units_context = []
|
1441 |
for i, t in enumerate(use_text_units):
|
1442 |
+
text_units_context.append(
|
1443 |
+
{
|
1444 |
+
"id": i + 1,
|
1445 |
+
"content": t["content"],
|
1446 |
+
"file_path": t.get("file_path", "unknown_source"),
|
1447 |
+
}
|
1448 |
)
|
|
|
1449 |
return entities_context, relations_context, text_units_context
|
1450 |
|
1451 |
|
|
|
1688 |
f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
|
1689 |
)
|
1690 |
|
1691 |
+
relations_context = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1692 |
for i, e in enumerate(edge_datas):
|
1693 |
created_at = e.get("created_at", "UNKNOWN")
|
1694 |
# Convert timestamp to readable format
|
|
|
1698 |
# Get file path from edge data
|
1699 |
file_path = e.get("file_path", "unknown_source")
|
1700 |
|
1701 |
+
relations_context.append(
|
1702 |
+
{
|
1703 |
+
"id": i + 1,
|
1704 |
+
"entity1": e["src_id"],
|
1705 |
+
"entity2": e["tgt_id"],
|
1706 |
+
"description": e["description"],
|
1707 |
+
"keywords": e["keywords"],
|
1708 |
+
"weight": e["weight"],
|
1709 |
+
"rank": e["rank"],
|
1710 |
+
"created_at": created_at,
|
1711 |
+
"file_path": file_path,
|
1712 |
+
}
|
1713 |
)
|
|
|
1714 |
|
1715 |
+
entities_context = []
|
|
|
|
|
1716 |
for i, n in enumerate(use_entities):
|
1717 |
created_at = n.get("created_at", "UNKNOWN")
|
1718 |
# Convert timestamp to readable format
|
|
|
1722 |
# Get file path from node data
|
1723 |
file_path = n.get("file_path", "unknown_source")
|
1724 |
|
1725 |
+
entities_context.append(
|
1726 |
+
{
|
1727 |
+
"id": i + 1,
|
1728 |
+
"entity": n["entity_name"],
|
1729 |
+
"type": n.get("entity_type", "UNKNOWN"),
|
1730 |
+
"description": n.get("description", "UNKNOWN"),
|
1731 |
+
"rank": n["rank"],
|
1732 |
+
"created_at": created_at,
|
1733 |
+
"file_path": file_path,
|
1734 |
+
}
|
1735 |
)
|
|
|
1736 |
|
1737 |
+
text_units_context = []
|
1738 |
for i, t in enumerate(use_text_units):
|
1739 |
+
text_units_context.append(
|
1740 |
+
{
|
1741 |
+
"id": i + 1,
|
1742 |
+
"content": t["content"],
|
1743 |
+
"file_path": t.get("file_path", "unknown"),
|
1744 |
+
}
|
1745 |
)
|
|
|
1746 |
return entities_context, relations_context, text_units_context
|
1747 |
|
1748 |
|
lightrag/utils.py
CHANGED
@@ -719,44 +719,6 @@ def truncate_list_by_token_size(
|
|
719 |
return list_data
|
720 |
|
721 |
|
722 |
-
def list_of_list_to_dict(data: list[list[str]]) -> list[dict[str, str]]:
|
723 |
-
"""Convert a 2D string list (table-like data) into a list of dictionaries.
|
724 |
-
|
725 |
-
The first row is treated as header containing field names. Subsequent rows become
|
726 |
-
dictionary entries where keys come from header and values from row data.
|
727 |
-
|
728 |
-
Args:
|
729 |
-
data: 2D string array where first row contains headers and rest are data rows.
|
730 |
-
Minimum 2 columns required in data rows (rows with <2 elements are skipped).
|
731 |
-
|
732 |
-
Returns:
|
733 |
-
List of dictionaries where each dict represents a data row with:
|
734 |
-
- Keys: Header values from first row
|
735 |
-
- Values: Corresponding row values (empty string if missing)
|
736 |
-
|
737 |
-
Example:
|
738 |
-
Input: [["Name","Age"], ["Alice","23"], ["Bob"]]
|
739 |
-
Output: [{"Name":"Alice","Age":"23"}, {"Name":"Bob","Age":""}]
|
740 |
-
"""
|
741 |
-
if not data or len(data) <= 1:
|
742 |
-
return []
|
743 |
-
|
744 |
-
header = data[0]
|
745 |
-
result = []
|
746 |
-
|
747 |
-
for row in data[1:]:
|
748 |
-
if len(row) >= 2:
|
749 |
-
item = {}
|
750 |
-
for i, field_name in enumerate(header):
|
751 |
-
if i < len(row):
|
752 |
-
item[field_name] = str(row[i])
|
753 |
-
else:
|
754 |
-
item[field_name] = ""
|
755 |
-
result.append(item)
|
756 |
-
|
757 |
-
return result
|
758 |
-
|
759 |
-
|
760 |
def save_data_to_file(data, file_name):
|
761 |
with open(file_name, "w", encoding="utf-8") as f:
|
762 |
json.dump(data, f, ensure_ascii=False, indent=4)
|
|
|
719 |
return list_data
|
720 |
|
721 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
722 |
def save_data_to_file(data, file_name):
|
723 |
with open(file_name, "w", encoding="utf-8") as f:
|
724 |
json.dump(data, f, ensure_ascii=False, indent=4)
|