cleaned code
Browse files- lightrag/base.py +2 -0
- lightrag/lightrag.py +4 -2
- lightrag/llm.py +5 -1
- lightrag/operate.py +1 -1
- lightrag/utils.py +14 -10
lightrag/base.py
CHANGED
|
@@ -107,9 +107,11 @@ class BaseVectorStorage(StorageNameSpace):
|
|
| 107 |
raise NotImplementedError
|
| 108 |
|
| 109 |
async def delete_entity(self, entity_name: str) -> None:
|
|
|
|
| 110 |
raise NotImplementedError
|
| 111 |
|
| 112 |
async def delete_entity_relation(self, entity_name: str) -> None:
|
|
|
|
| 113 |
raise NotImplementedError
|
| 114 |
|
| 115 |
|
|
|
|
| 107 |
raise NotImplementedError
|
| 108 |
|
| 109 |
async def delete_entity(self, entity_name: str) -> None:
|
| 110 |
+
"""Delete a single entity by its name"""
|
| 111 |
raise NotImplementedError
|
| 112 |
|
| 113 |
async def delete_entity_relation(self, entity_name: str) -> None:
|
| 114 |
+
"""Delete relations for a given entity by scanning metadata"""
|
| 115 |
raise NotImplementedError
|
| 116 |
|
| 117 |
|
lightrag/lightrag.py
CHANGED
|
@@ -524,7 +524,6 @@ class LightRAG:
|
|
| 524 |
embedding_func=None,
|
| 525 |
)
|
| 526 |
|
| 527 |
-
# What's for, Is this nessisary ?
|
| 528 |
if self.llm_response_cache and hasattr(
|
| 529 |
self.llm_response_cache, "global_config"
|
| 530 |
):
|
|
@@ -1252,7 +1251,7 @@ class LightRAG:
|
|
| 1252 |
"""
|
| 1253 |
return await self.doc_status.get_status_counts()
|
| 1254 |
|
| 1255 |
-
async def adelete_by_doc_id(self, doc_id: str):
|
| 1256 |
"""Delete a document and all its related data
|
| 1257 |
|
| 1258 |
Args:
|
|
@@ -1269,6 +1268,9 @@ class LightRAG:
|
|
| 1269 |
|
| 1270 |
# 2. Get all related chunks
|
| 1271 |
chunks = await self.text_chunks.get_by_id(doc_id)
|
|
|
|
|
|
|
|
|
|
| 1272 |
chunk_ids = list(chunks.keys())
|
| 1273 |
logger.debug(f"Found {len(chunk_ids)} chunks to delete")
|
| 1274 |
|
|
|
|
| 524 |
embedding_func=None,
|
| 525 |
)
|
| 526 |
|
|
|
|
| 527 |
if self.llm_response_cache and hasattr(
|
| 528 |
self.llm_response_cache, "global_config"
|
| 529 |
):
|
|
|
|
| 1251 |
"""
|
| 1252 |
return await self.doc_status.get_status_counts()
|
| 1253 |
|
| 1254 |
+
async def adelete_by_doc_id(self, doc_id: str) -> None:
|
| 1255 |
"""Delete a document and all its related data
|
| 1256 |
|
| 1257 |
Args:
|
|
|
|
| 1268 |
|
| 1269 |
# 2. Get all related chunks
|
| 1270 |
chunks = await self.text_chunks.get_by_id(doc_id)
|
| 1271 |
+
if not chunks:
|
| 1272 |
+
return
|
| 1273 |
+
|
| 1274 |
chunk_ids = list(chunks.keys())
|
| 1275 |
logger.debug(f"Found {len(chunk_ids)} chunks to delete")
|
| 1276 |
|
lightrag/llm.py
CHANGED
|
@@ -66,7 +66,11 @@ class MultiModel:
|
|
| 66 |
return self._models[self._current_model]
|
| 67 |
|
| 68 |
async def llm_model_func(
|
| 69 |
-
self,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
) -> str:
|
| 71 |
kwargs.pop("model", None) # stop from overwriting the custom model name
|
| 72 |
kwargs.pop("keyword_extraction", None)
|
|
|
|
| 66 |
return self._models[self._current_model]
|
| 67 |
|
| 68 |
async def llm_model_func(
|
| 69 |
+
self,
|
| 70 |
+
prompt: str,
|
| 71 |
+
system_prompt: str | None = None,
|
| 72 |
+
history_messages: list[dict[str, Any]] = [],
|
| 73 |
+
**kwargs: Any,
|
| 74 |
) -> str:
|
| 75 |
kwargs.pop("model", None) # stop from overwriting the custom model name
|
| 76 |
kwargs.pop("keyword_extraction", None)
|
lightrag/operate.py
CHANGED
|
@@ -1608,7 +1608,7 @@ async def kg_query_with_keywords(
|
|
| 1608 |
query_param: QueryParam,
|
| 1609 |
global_config: dict[str, str],
|
| 1610 |
hashing_kv: BaseKVStorage | None = None,
|
| 1611 |
-
) -> str:
|
| 1612 |
"""
|
| 1613 |
Refactored kg_query that does NOT extract keywords by itself.
|
| 1614 |
It expects hl_keywords and ll_keywords to be set in query_param, or defaults to empty.
|
|
|
|
| 1608 |
query_param: QueryParam,
|
| 1609 |
global_config: dict[str, str],
|
| 1610 |
hashing_kv: BaseKVStorage | None = None,
|
| 1611 |
+
) -> str | AsyncIterator[str]:
|
| 1612 |
"""
|
| 1613 |
Refactored kg_query that does NOT extract keywords by itself.
|
| 1614 |
It expects hl_keywords and ll_keywords to be set in query_param, or defaults to empty.
|
lightrag/utils.py
CHANGED
|
@@ -9,7 +9,7 @@ import re
|
|
| 9 |
from dataclasses import dataclass
|
| 10 |
from functools import wraps
|
| 11 |
from hashlib import md5
|
| 12 |
-
from typing import Any, Union, List, Optional
|
| 13 |
import xml.etree.ElementTree as ET
|
| 14 |
import bs4
|
| 15 |
|
|
@@ -67,7 +67,7 @@ class EmbeddingFunc:
|
|
| 67 |
|
| 68 |
@dataclass
|
| 69 |
class ReasoningResponse:
|
| 70 |
-
reasoning_content: str
|
| 71 |
response_content: str
|
| 72 |
tag: str
|
| 73 |
|
|
@@ -109,7 +109,7 @@ def convert_response_to_json(response: str) -> dict[str, Any]:
|
|
| 109 |
raise e from None
|
| 110 |
|
| 111 |
|
| 112 |
-
def compute_args_hash(*args, cache_type: str = None) -> str:
|
| 113 |
"""Compute a hash for the given arguments.
|
| 114 |
Args:
|
| 115 |
*args: Arguments to hash
|
|
@@ -220,11 +220,13 @@ def clean_str(input: Any) -> str:
|
|
| 220 |
return re.sub(r"[\x00-\x1f\x7f-\x9f]", "", result)
|
| 221 |
|
| 222 |
|
| 223 |
-
def is_float_regex(value):
|
| 224 |
return bool(re.match(r"^[-+]?[0-9]*\.?[0-9]+$", value))
|
| 225 |
|
| 226 |
|
| 227 |
-
def truncate_list_by_token_size(
|
|
|
|
|
|
|
| 228 |
"""Truncate a list of data by token size"""
|
| 229 |
if max_token_size <= 0:
|
| 230 |
return []
|
|
@@ -334,7 +336,7 @@ def xml_to_json(xml_file):
|
|
| 334 |
return None
|
| 335 |
|
| 336 |
|
| 337 |
-
def process_combine_contexts(hl, ll):
|
| 338 |
header = None
|
| 339 |
list_hl = csv_string_to_list(hl.strip())
|
| 340 |
list_ll = csv_string_to_list(ll.strip())
|
|
@@ -640,7 +642,9 @@ def exists_func(obj, func_name: str) -> bool:
|
|
| 640 |
return False
|
| 641 |
|
| 642 |
|
| 643 |
-
def get_conversation_turns(
|
|
|
|
|
|
|
| 644 |
"""
|
| 645 |
Process conversation history to get the specified number of complete turns.
|
| 646 |
|
|
@@ -652,8 +656,8 @@ def get_conversation_turns(conversation_history: list[dict], num_turns: int) ->
|
|
| 652 |
Formatted string of the conversation history
|
| 653 |
"""
|
| 654 |
# Group messages into turns
|
| 655 |
-
turns = []
|
| 656 |
-
messages = []
|
| 657 |
|
| 658 |
# First, filter out keyword extraction messages
|
| 659 |
for msg in conversation_history:
|
|
@@ -687,7 +691,7 @@ def get_conversation_turns(conversation_history: list[dict], num_turns: int) ->
|
|
| 687 |
turns = turns[-num_turns:]
|
| 688 |
|
| 689 |
# Format the turns into a string
|
| 690 |
-
formatted_turns = []
|
| 691 |
for turn in turns:
|
| 692 |
formatted_turns.extend(
|
| 693 |
[f"user: {turn[0]['content']}", f"assistant: {turn[1]['content']}"]
|
|
|
|
| 9 |
from dataclasses import dataclass
|
| 10 |
from functools import wraps
|
| 11 |
from hashlib import md5
|
| 12 |
+
from typing import Any, Callable, Union, List, Optional
|
| 13 |
import xml.etree.ElementTree as ET
|
| 14 |
import bs4
|
| 15 |
|
|
|
|
| 67 |
|
| 68 |
@dataclass
|
| 69 |
class ReasoningResponse:
|
| 70 |
+
reasoning_content: str | None
|
| 71 |
response_content: str
|
| 72 |
tag: str
|
| 73 |
|
|
|
|
| 109 |
raise e from None
|
| 110 |
|
| 111 |
|
| 112 |
+
def compute_args_hash(*args: Any, cache_type: str | None = None) -> str:
|
| 113 |
"""Compute a hash for the given arguments.
|
| 114 |
Args:
|
| 115 |
*args: Arguments to hash
|
|
|
|
| 220 |
return re.sub(r"[\x00-\x1f\x7f-\x9f]", "", result)
|
| 221 |
|
| 222 |
|
| 223 |
+
def is_float_regex(value: str) -> bool:
|
| 224 |
return bool(re.match(r"^[-+]?[0-9]*\.?[0-9]+$", value))
|
| 225 |
|
| 226 |
|
| 227 |
+
def truncate_list_by_token_size(
|
| 228 |
+
list_data: list[Any], key: Callable[[Any], str], max_token_size: int
|
| 229 |
+
) -> list[int]:
|
| 230 |
"""Truncate a list of data by token size"""
|
| 231 |
if max_token_size <= 0:
|
| 232 |
return []
|
|
|
|
| 336 |
return None
|
| 337 |
|
| 338 |
|
| 339 |
+
def process_combine_contexts(hl: str, ll: str):
|
| 340 |
header = None
|
| 341 |
list_hl = csv_string_to_list(hl.strip())
|
| 342 |
list_ll = csv_string_to_list(ll.strip())
|
|
|
|
| 642 |
return False
|
| 643 |
|
| 644 |
|
| 645 |
+
def get_conversation_turns(
|
| 646 |
+
conversation_history: list[dict[str, Any]], num_turns: int
|
| 647 |
+
) -> str:
|
| 648 |
"""
|
| 649 |
Process conversation history to get the specified number of complete turns.
|
| 650 |
|
|
|
|
| 656 |
Formatted string of the conversation history
|
| 657 |
"""
|
| 658 |
# Group messages into turns
|
| 659 |
+
turns: list[list[dict[str, Any]]] = []
|
| 660 |
+
messages: list[dict[str, Any]] = []
|
| 661 |
|
| 662 |
# First, filter out keyword extraction messages
|
| 663 |
for msg in conversation_history:
|
|
|
|
| 691 |
turns = turns[-num_turns:]
|
| 692 |
|
| 693 |
# Format the turns into a string
|
| 694 |
+
formatted_turns: list[str] = []
|
| 695 |
for turn in turns:
|
| 696 |
formatted_turns.extend(
|
| 697 |
[f"user: {turn[0]['content']}", f"assistant: {turn[1]['content']}"]
|