YanSte commited on
Commit
049ff37
·
1 Parent(s): f428bda

added field

Browse files
Files changed (1) hide show
  1. lightrag/lightrag.py +9 -10
lightrag/lightrag.py CHANGED
@@ -231,12 +231,12 @@ class LightRAG:
231
  """LightRAG: Simple and Fast Retrieval-Augmented Generation."""
232
 
233
  working_dir: str = field(
234
- default_factory=lambda: f"./lightrag_cache_{datetime.now().strftime('%Y-%m-%d-%H:%M:%S')}"
235
  )
236
  """Directory where cache and temporary files are stored."""
237
 
238
  embedding_cache_config: dict[str, Any] = field(
239
- default_factory=lambda: {
240
  "enabled": False,
241
  "similarity_threshold": 0.95,
242
  "use_llm_check": False,
@@ -261,32 +261,31 @@ class LightRAG:
261
  """Storage type for tracking document processing statuses."""
262
 
263
  # Logging
264
- current_log_level = logger.level
265
- log_level: int = field(default=current_log_level)
266
  """Logging level for the system (e.g., 'DEBUG', 'INFO', 'WARNING')."""
267
 
268
  log_dir: str = field(default=os.getcwd())
269
  """Directory where logs are stored. Defaults to the current working directory."""
270
 
271
  # Text chunking
272
- chunk_token_size: int = int(os.getenv("CHUNK_SIZE", "1200"))
273
  """Maximum number of tokens per text chunk when splitting documents."""
274
 
275
- chunk_overlap_token_size: int = int(os.getenv("CHUNK_OVERLAP_SIZE", "100"))
276
  """Number of overlapping tokens between consecutive text chunks to preserve context."""
277
 
278
- tiktoken_model_name: str = "gpt-4o-mini"
279
  """Model name used for tokenization when chunking text."""
280
 
281
  # Entity extraction
282
- entity_extract_max_gleaning: int = 1
283
  """Maximum number of entity extraction attempts for ambiguous content."""
284
 
285
- entity_summary_to_max_tokens: int = int(os.getenv("MAX_TOKEN_SUMMARY", "500"))
286
  """Maximum number of tokens used for summarizing extracted entities."""
287
 
288
  # Node embedding
289
- node_embedding_algorithm: str = "node2vec"
290
  """Algorithm used for node embedding in knowledge graphs."""
291
 
292
  node2vec_params: dict[str, int] = field(
 
231
  """LightRAG: Simple and Fast Retrieval-Augmented Generation."""
232
 
233
  working_dir: str = field(
234
+ default=f"./lightrag_cache_{datetime.now().strftime('%Y-%m-%d-%H:%M:%S')}"
235
  )
236
  """Directory where cache and temporary files are stored."""
237
 
238
  embedding_cache_config: dict[str, Any] = field(
239
+ default={
240
  "enabled": False,
241
  "similarity_threshold": 0.95,
242
  "use_llm_check": False,
 
261
  """Storage type for tracking document processing statuses."""
262
 
263
  # Logging
264
+ log_level: int = field(default=logger.level)
 
265
  """Logging level for the system (e.g., 'DEBUG', 'INFO', 'WARNING')."""
266
 
267
  log_dir: str = field(default=os.getcwd())
268
  """Directory where logs are stored. Defaults to the current working directory."""
269
 
270
  # Text chunking
271
+ chunk_token_size: int = field(default=int(os.getenv("CHUNK_SIZE", 1200)))
272
  """Maximum number of tokens per text chunk when splitting documents."""
273
 
274
+ chunk_overlap_token_size: int = field(default=int(os.getenv("CHUNK_OVERLAP_SIZE", 100)))
275
  """Number of overlapping tokens between consecutive text chunks to preserve context."""
276
 
277
+ tiktoken_model_name: str = field(default="gpt-4o-mini")
278
  """Model name used for tokenization when chunking text."""
279
 
280
  # Entity extraction
281
+ entity_extract_max_gleaning: int = field(default=1)
282
  """Maximum number of entity extraction attempts for ambiguous content."""
283
 
284
+ entity_summary_to_max_tokens: int = field(default=int(os.getenv("MAX_TOKEN_SUMMARY", 500)))
285
  """Maximum number of tokens used for summarizing extracted entities."""
286
 
287
  # Node embedding
288
+ node_embedding_algorithm: str = field(default="node2vec")
289
  """Algorithm used for node embedding in knowledge graphs."""
290
 
291
  node2vec_params: dict[str, int] = field(