added field
Browse files- lightrag/lightrag.py +9 -10
lightrag/lightrag.py
CHANGED
|
@@ -231,12 +231,12 @@ class LightRAG:
|
|
| 231 |
"""LightRAG: Simple and Fast Retrieval-Augmented Generation."""
|
| 232 |
|
| 233 |
working_dir: str = field(
|
| 234 |
-
|
| 235 |
)
|
| 236 |
"""Directory where cache and temporary files are stored."""
|
| 237 |
|
| 238 |
embedding_cache_config: dict[str, Any] = field(
|
| 239 |
-
|
| 240 |
"enabled": False,
|
| 241 |
"similarity_threshold": 0.95,
|
| 242 |
"use_llm_check": False,
|
|
@@ -261,32 +261,31 @@ class LightRAG:
|
|
| 261 |
"""Storage type for tracking document processing statuses."""
|
| 262 |
|
| 263 |
# Logging
|
| 264 |
-
|
| 265 |
-
log_level: int = field(default=current_log_level)
|
| 266 |
"""Logging level for the system (e.g., 'DEBUG', 'INFO', 'WARNING')."""
|
| 267 |
|
| 268 |
log_dir: str = field(default=os.getcwd())
|
| 269 |
"""Directory where logs are stored. Defaults to the current working directory."""
|
| 270 |
|
| 271 |
# Text chunking
|
| 272 |
-
chunk_token_size: int = int(os.getenv("CHUNK_SIZE",
|
| 273 |
"""Maximum number of tokens per text chunk when splitting documents."""
|
| 274 |
|
| 275 |
-
chunk_overlap_token_size: int = int(os.getenv("CHUNK_OVERLAP_SIZE",
|
| 276 |
"""Number of overlapping tokens between consecutive text chunks to preserve context."""
|
| 277 |
|
| 278 |
-
tiktoken_model_name: str = "gpt-4o-mini"
|
| 279 |
"""Model name used for tokenization when chunking text."""
|
| 280 |
|
| 281 |
# Entity extraction
|
| 282 |
-
entity_extract_max_gleaning: int = 1
|
| 283 |
"""Maximum number of entity extraction attempts for ambiguous content."""
|
| 284 |
|
| 285 |
-
entity_summary_to_max_tokens: int = int(os.getenv("MAX_TOKEN_SUMMARY",
|
| 286 |
"""Maximum number of tokens used for summarizing extracted entities."""
|
| 287 |
|
| 288 |
# Node embedding
|
| 289 |
-
node_embedding_algorithm: str = "node2vec"
|
| 290 |
"""Algorithm used for node embedding in knowledge graphs."""
|
| 291 |
|
| 292 |
node2vec_params: dict[str, int] = field(
|
|
|
|
| 231 |
"""LightRAG: Simple and Fast Retrieval-Augmented Generation."""
|
| 232 |
|
| 233 |
working_dir: str = field(
|
| 234 |
+
default=f"./lightrag_cache_{datetime.now().strftime('%Y-%m-%d-%H:%M:%S')}"
|
| 235 |
)
|
| 236 |
"""Directory where cache and temporary files are stored."""
|
| 237 |
|
| 238 |
embedding_cache_config: dict[str, Any] = field(
|
| 239 |
+
default={
|
| 240 |
"enabled": False,
|
| 241 |
"similarity_threshold": 0.95,
|
| 242 |
"use_llm_check": False,
|
|
|
|
| 261 |
"""Storage type for tracking document processing statuses."""
|
| 262 |
|
| 263 |
# Logging
|
| 264 |
+
log_level: int = field(default=logger.level)
|
|
|
|
| 265 |
"""Logging level for the system (e.g., 'DEBUG', 'INFO', 'WARNING')."""
|
| 266 |
|
| 267 |
log_dir: str = field(default=os.getcwd())
|
| 268 |
"""Directory where logs are stored. Defaults to the current working directory."""
|
| 269 |
|
| 270 |
# Text chunking
|
| 271 |
+
chunk_token_size: int = field(default=int(os.getenv("CHUNK_SIZE", 1200)))
|
| 272 |
"""Maximum number of tokens per text chunk when splitting documents."""
|
| 273 |
|
| 274 |
+
chunk_overlap_token_size: int = field(default=int(os.getenv("CHUNK_OVERLAP_SIZE", 100)))
|
| 275 |
"""Number of overlapping tokens between consecutive text chunks to preserve context."""
|
| 276 |
|
| 277 |
+
tiktoken_model_name: str = field(default="gpt-4o-mini")
|
| 278 |
"""Model name used for tokenization when chunking text."""
|
| 279 |
|
| 280 |
# Entity extraction
|
| 281 |
+
entity_extract_max_gleaning: int = field(default=1)
|
| 282 |
"""Maximum number of entity extraction attempts for ambiguous content."""
|
| 283 |
|
| 284 |
+
entity_summary_to_max_tokens: int = field(default=int(os.getenv("MAX_TOKEN_SUMMARY", 500)))
|
| 285 |
"""Maximum number of tokens used for summarizing extracted entities."""
|
| 286 |
|
| 287 |
# Node embedding
|
| 288 |
+
node_embedding_algorithm: str = field(default="node2vec")
|
| 289 |
"""Algorithm used for node embedding in knowledge graphs."""
|
| 290 |
|
| 291 |
node2vec_params: dict[str, int] = field(
|