cleaned code
Browse files- lightrag/lightrag.py +2 -1
- lightrag/operate.py +2 -4
- lightrag/prompt.py +8 -10
lightrag/lightrag.py
CHANGED
@@ -687,7 +687,7 @@ class LightRAG:
|
|
687 |
return
|
688 |
|
689 |
update_storage = True
|
690 |
-
logger.info(f"
|
691 |
|
692 |
inserting_chunks: dict[str, Any] = {}
|
693 |
for chunk_text in text_chunks:
|
@@ -914,6 +914,7 @@ class LightRAG:
|
|
914 |
if storage_inst is not None
|
915 |
]
|
916 |
await asyncio.gather(*tasks)
|
|
|
917 |
|
918 |
def insert_custom_kg(self, custom_kg: dict[str, Any]) -> None:
|
919 |
loop = always_get_an_event_loop()
|
|
|
687 |
return
|
688 |
|
689 |
update_storage = True
|
690 |
+
logger.info(f"Inserting {len(new_docs)} docs")
|
691 |
|
692 |
inserting_chunks: dict[str, Any] = {}
|
693 |
for chunk_text in text_chunks:
|
|
|
914 |
if storage_inst is not None
|
915 |
]
|
916 |
await asyncio.gather(*tasks)
|
917 |
+
logger.info("All Insert done")
|
918 |
|
919 |
def insert_custom_kg(self, custom_kg: dict[str, Any]) -> None:
|
920 |
loop = always_get_an_event_loop()
|
lightrag/operate.py
CHANGED
@@ -491,11 +491,9 @@ async def extract_entities(
|
|
491 |
already_processed += 1
|
492 |
already_entities += len(maybe_nodes)
|
493 |
already_relations += len(maybe_edges)
|
494 |
-
|
495 |
-
already_processed % len(PROMPTS["process_tickers"])
|
496 |
-
]
|
497 |
logger.debug(
|
498 |
-
f"
|
499 |
)
|
500 |
return dict(maybe_nodes), dict(maybe_edges)
|
501 |
|
|
|
491 |
already_processed += 1
|
492 |
already_entities += len(maybe_nodes)
|
493 |
already_relations += len(maybe_edges)
|
494 |
+
|
|
|
|
|
495 |
logger.debug(
|
496 |
+
f"Processed {already_processed} chunks, {already_entities} entities(duplicated), {already_relations} relations(duplicated)\r",
|
497 |
)
|
498 |
return dict(maybe_nodes), dict(maybe_edges)
|
499 |
|
lightrag/prompt.py
CHANGED
@@ -9,15 +9,14 @@ PROMPTS["DEFAULT_LANGUAGE"] = "English"
|
|
9 |
PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
|
10 |
PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##"
|
11 |
PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
|
12 |
-
PROMPTS["process_tickers"] = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
|
13 |
|
14 |
PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event", "category"]
|
15 |
|
16 |
-
PROMPTS["entity_extraction"] = """
|
17 |
Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.
|
18 |
Use {language} as output language.
|
19 |
|
20 |
-
|
21 |
1. Identify all entities. For each identified entity, extract the following information:
|
22 |
- entity_name: Name of the entity, use same language as input text. If English, capitalized the name.
|
23 |
- entity_type: One of the following types: [{entity_types}]
|
@@ -41,18 +40,17 @@ Format the content-level key words as ("content_keywords"{tuple_delimiter}<high_
|
|
41 |
5. When finished, output {completion_delimiter}
|
42 |
|
43 |
######################
|
44 |
-
|
45 |
######################
|
46 |
{examples}
|
47 |
|
48 |
#############################
|
49 |
-
|
50 |
######################
|
51 |
Entity_types: {entity_types}
|
52 |
Text: {input_text}
|
53 |
######################
|
54 |
-
Output:
|
55 |
-
"""
|
56 |
|
57 |
PROMPTS["entity_extraction_examples"] = [
|
58 |
"""Example 1:
|
@@ -137,7 +135,7 @@ Make sure it is written in third person, and include the entity names so we the
|
|
137 |
Use {language} as output language.
|
138 |
|
139 |
#######
|
140 |
-
|
141 |
Entities: {entity_name}
|
142 |
Description List: {description_list}
|
143 |
#######
|
@@ -205,12 +203,12 @@ Given the query and conversation history, list both high-level and low-level key
|
|
205 |
- "low_level_keywords" for specific entities or details
|
206 |
|
207 |
######################
|
208 |
-
|
209 |
######################
|
210 |
{examples}
|
211 |
|
212 |
#############################
|
213 |
-
|
214 |
######################
|
215 |
Conversation History:
|
216 |
{history}
|
|
|
9 |
PROMPTS["DEFAULT_TUPLE_DELIMITER"] = "<|>"
|
10 |
PROMPTS["DEFAULT_RECORD_DELIMITER"] = "##"
|
11 |
PROMPTS["DEFAULT_COMPLETION_DELIMITER"] = "<|COMPLETE|>"
|
|
|
12 |
|
13 |
PROMPTS["DEFAULT_ENTITY_TYPES"] = ["organization", "person", "geo", "event", "category"]
|
14 |
|
15 |
+
PROMPTS["entity_extraction"] = """---Goal---
|
16 |
Given a text document that is potentially relevant to this activity and a list of entity types, identify all entities of those types from the text and all relationships among the identified entities.
|
17 |
Use {language} as output language.
|
18 |
|
19 |
+
---Steps---
|
20 |
1. Identify all entities. For each identified entity, extract the following information:
|
21 |
- entity_name: Name of the entity, use same language as input text. If English, capitalized the name.
|
22 |
- entity_type: One of the following types: [{entity_types}]
|
|
|
40 |
5. When finished, output {completion_delimiter}
|
41 |
|
42 |
######################
|
43 |
+
---Examples---
|
44 |
######################
|
45 |
{examples}
|
46 |
|
47 |
#############################
|
48 |
+
---Real Data---
|
49 |
######################
|
50 |
Entity_types: {entity_types}
|
51 |
Text: {input_text}
|
52 |
######################
|
53 |
+
Output:"""
|
|
|
54 |
|
55 |
PROMPTS["entity_extraction_examples"] = [
|
56 |
"""Example 1:
|
|
|
135 |
Use {language} as output language.
|
136 |
|
137 |
#######
|
138 |
+
---Data---
|
139 |
Entities: {entity_name}
|
140 |
Description List: {description_list}
|
141 |
#######
|
|
|
203 |
- "low_level_keywords" for specific entities or details
|
204 |
|
205 |
######################
|
206 |
+
---Examples---
|
207 |
######################
|
208 |
{examples}
|
209 |
|
210 |
#############################
|
211 |
+
---Real Data---
|
212 |
######################
|
213 |
Conversation History:
|
214 |
{history}
|