Spaces:

rm-lht
/

lightrag

Configuration error

App Files Files

xet

Community

gzdaniel commited on May 7

Commit

ead12ac

1 Parent(s): 5c8c3d9

Remove list_of_list_to_dict function

Browse files

Files changed (2) hide show

lightrag/operate.py +71 -111
lightrag/utils.py +0 -38

lightrag/operate.py CHANGED Viewed

@@ -25,7 +25,6 @@ from .utils import (
     CacheData,
     get_conversation_turns,
     use_llm_func_with_cache,
-    list_of_list_to_dict,
 )
 from .base import (
     BaseGraphStorage,
@@ -1175,22 +1174,17 @@ async def _get_vector_context(
         entities_context = []
         relations_context = []
-        # Create text_units_context in the same format as _get_edge_data and _get_node_data
-        text_units_section_list = [["id", "content", "file_path"]]
         for i, chunk in enumerate(maybe_trun_chunks):
-            # Add to text_units_section_list
-            text_units_section_list.append(
-                [
-                    i + 1,  # id
-                    chunk["content"],  # content
-                    chunk["file_path"],  # file_path
-                ]
             )
-        # Convert to dictionary format using list_of_list_to_dict
-        text_units_context = list_of_list_to_dict(text_units_section_list)
         return entities_context, relations_context, text_units_context
     except Exception as e:
         logger.error(f"Error in _get_vector_context: {e}")
@@ -1398,17 +1392,7 @@ async def _get_node_data(
     )
     # build prompt
-    entites_section_list = [
-        [
-            "id",
-            "entity",
-            "type",
-            "description",
-            "rank",
-            "created_at",
-            "file_path",
-        ]
-    ]
     for i, n in enumerate(node_datas):
         created_at = n.get("created_at", "UNKNOWN")
         if isinstance(created_at, (int, float)):
@@ -1417,32 +1401,19 @@ async def _get_node_data(
         # Get file path from node data
         file_path = n.get("file_path", "unknown_source")
-        entites_section_list.append(
-            [
-                i + 1,
-                n["entity_name"],
-                n.get("entity_type", "UNKNOWN"),
-                n.get("description", "UNKNOWN"),
-                n["rank"],
-                created_at,
-                file_path,
-            ]
         )
-    entities_context = list_of_list_to_dict(entites_section_list)
-    relations_section_list = [
-        [
-            "id",
-            "entity1",
-            "entity2",
-            "description",
-            "keywords",
-            "weight",
-            "rank",
-            "created_at",
-            "file_path",
-        ]
-    ]
     for i, e in enumerate(use_relations):
         created_at = e.get("created_at", "UNKNOWN")
         # Convert timestamp to readable format
@@ -1452,27 +1423,29 @@ async def _get_node_data(
         # Get file path from edge data
         file_path = e.get("file_path", "unknown_source")
-        relations_section_list.append(
-            [
-                i + 1,
-                e["src_tgt"][0],
-                e["src_tgt"][1],
-                e["description"],
-                e["keywords"],
-                e["weight"],
-                e["rank"],
-                created_at,
-                file_path,
-            ]
         )
-    relations_context = list_of_list_to_dict(relations_section_list)
-    text_units_section_list = [["id", "content", "file_path"]]
     for i, t in enumerate(use_text_units):
-        text_units_section_list.append(
-            [i + 1, t["content"], t.get("file_path", "unknown_source")]
         )
-    text_units_context = list_of_list_to_dict(text_units_section_list)
     return entities_context, relations_context, text_units_context
@@ -1715,19 +1688,7 @@ async def _get_edge_data(
         f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
     )
-    relations_section_list = [
-        [
-            "id",
-            "entity1",
-            "entity2",
-            "description",
-            "keywords",
-            "weight",
-            "rank",
-            "created_at",
-            "file_path",
-        ]
-    ]
     for i, e in enumerate(edge_datas):
         created_at = e.get("created_at", "UNKNOWN")
         # Convert timestamp to readable format
@@ -1737,24 +1698,21 @@ async def _get_edge_data(
         # Get file path from edge data
         file_path = e.get("file_path", "unknown_source")
-        relations_section_list.append(
-            [
-                i + 1,
-                e["src_id"],
-                e["tgt_id"],
-                e["description"],
-                e["keywords"],
-                e["weight"],
-                e["rank"],
-                created_at,
-                file_path,
-            ]
         )
-    relations_context = list_of_list_to_dict(relations_section_list)
-    entites_section_list = [
-        ["id", "entity", "type", "description", "rank", "created_at", "file_path"]
-    ]
     for i, n in enumerate(use_entities):
         created_at = n.get("created_at", "UNKNOWN")
         # Convert timestamp to readable format
@@ -1764,25 +1722,27 @@ async def _get_edge_data(
         # Get file path from node data
         file_path = n.get("file_path", "unknown_source")
-        entites_section_list.append(
-            [
-                i + 1,
-                n["entity_name"],
-                n.get("entity_type", "UNKNOWN"),
-                n.get("description", "UNKNOWN"),
-                n["rank"],
-                created_at,
-                file_path,
-            ]
         )
-    entities_context = list_of_list_to_dict(entites_section_list)
-    text_units_section_list = [["id", "content", "file_path"]]
     for i, t in enumerate(use_text_units):
-        text_units_section_list.append(
-            [i + 1, t["content"], t.get("file_path", "unknown")]
         )
-    text_units_context = list_of_list_to_dict(text_units_section_list)
     return entities_context, relations_context, text_units_context

     CacheData,
     get_conversation_turns,
     use_llm_func_with_cache,
 )
 from .base import (
     BaseGraphStorage,
         entities_context = []
         relations_context = []
+        # Create text_units_context directly as a list of dictionaries
+        text_units_context = []
         for i, chunk in enumerate(maybe_trun_chunks):
+            text_units_context.append(
+                {
+                    "id": i + 1,
+                    "content": chunk["content"],
+                    "file_path": chunk["file_path"],
+                }
             )
         return entities_context, relations_context, text_units_context
     except Exception as e:
         logger.error(f"Error in _get_vector_context: {e}")
     )
     # build prompt
+    entities_context = []
     for i, n in enumerate(node_datas):
         created_at = n.get("created_at", "UNKNOWN")
         if isinstance(created_at, (int, float)):
         # Get file path from node data
         file_path = n.get("file_path", "unknown_source")
+        entities_context.append(
+            {
+                "id": i + 1,
+                "entity": n["entity_name"],
+                "type": n.get("entity_type", "UNKNOWN"),
+                "description": n.get("description", "UNKNOWN"),
+                "rank": n["rank"],
+                "created_at": created_at,
+                "file_path": file_path,
+            }
         )
+    relations_context = []
     for i, e in enumerate(use_relations):
         created_at = e.get("created_at", "UNKNOWN")
         # Convert timestamp to readable format
         # Get file path from edge data
         file_path = e.get("file_path", "unknown_source")
+        relations_context.append(
+            {
+                "id": i + 1,
+                "entity1": e["src_tgt"][0],
+                "entity2": e["src_tgt"][1],
+                "description": e["description"],
+                "keywords": e["keywords"],
+                "weight": e["weight"],
+                "rank": e["rank"],
+                "created_at": created_at,
+                "file_path": file_path,
+            }
         )
+    text_units_context = []
     for i, t in enumerate(use_text_units):
+        text_units_context.append(
+            {
+                "id": i + 1,
+                "content": t["content"],
+                "file_path": t.get("file_path", "unknown_source"),
+            }
         )
     return entities_context, relations_context, text_units_context
         f"Global query uses {len(use_entities)} entites, {len(edge_datas)} relations, {len(use_text_units)} chunks"
     )
+    relations_context = []
     for i, e in enumerate(edge_datas):
         created_at = e.get("created_at", "UNKNOWN")
         # Convert timestamp to readable format
         # Get file path from edge data
         file_path = e.get("file_path", "unknown_source")
+        relations_context.append(
+            {
+                "id": i + 1,
+                "entity1": e["src_id"],
+                "entity2": e["tgt_id"],
+                "description": e["description"],
+                "keywords": e["keywords"],
+                "weight": e["weight"],
+                "rank": e["rank"],
+                "created_at": created_at,
+                "file_path": file_path,
+            }
         )
+    entities_context = []
     for i, n in enumerate(use_entities):
         created_at = n.get("created_at", "UNKNOWN")
         # Convert timestamp to readable format
         # Get file path from node data
         file_path = n.get("file_path", "unknown_source")
+        entities_context.append(
+            {
+                "id": i + 1,
+                "entity": n["entity_name"],
+                "type": n.get("entity_type", "UNKNOWN"),
+                "description": n.get("description", "UNKNOWN"),
+                "rank": n["rank"],
+                "created_at": created_at,
+                "file_path": file_path,
+            }
         )
+    text_units_context = []
     for i, t in enumerate(use_text_units):
+        text_units_context.append(
+            {
+                "id": i + 1,
+                "content": t["content"],
+                "file_path": t.get("file_path", "unknown"),
+            }
         )
     return entities_context, relations_context, text_units_context

lightrag/utils.py CHANGED Viewed

@@ -719,44 +719,6 @@ def truncate_list_by_token_size(
     return list_data
-def list_of_list_to_dict(data: list[list[str]]) -> list[dict[str, str]]:
-    """Convert a 2D string list (table-like data) into a list of dictionaries.
-    The first row is treated as header containing field names. Subsequent rows become
-    dictionary entries where keys come from header and values from row data.
-    Args:
-        data: 2D string array where first row contains headers and rest are data rows.
-              Minimum 2 columns required in data rows (rows with <2 elements are skipped).
-    Returns:
-        List of dictionaries where each dict represents a data row with:
-        - Keys: Header values from first row
-        - Values: Corresponding row values (empty string if missing)
-    Example:
-        Input: [["Name","Age"], ["Alice","23"], ["Bob"]]
-        Output: [{"Name":"Alice","Age":"23"}, {"Name":"Bob","Age":""}]
-    """
-    if not data or len(data) <= 1:
-        return []
-    header = data[0]
-    result = []
-    for row in data[1:]:
-        if len(row) >= 2:
-            item = {}
-            for i, field_name in enumerate(header):
-                if i < len(row):
-                    item[field_name] = str(row[i])
-                else:
-                    item[field_name] = ""
-            result.append(item)
-    return result
 def save_data_to_file(data, file_name):
     with open(file_name, "w", encoding="utf-8") as f:
         json.dump(data, f, ensure_ascii=False, indent=4)

     return list_data
 def save_data_to_file(data, file_name):
     with open(file_name, "w", encoding="utf-8") as f:
         json.dump(data, f, ensure_ascii=False, indent=4)