Spaces:
Sleeping
Sleeping
| from dataclasses import dataclass | |
| class RAGConfig: | |
| # Embedding 模型 | |
| embedding_model_name: str = "sentence-transformers/all-MiniLM-L6-v2" | |
| normalize_embeddings: bool = True | |
| # 检索参数 | |
| top_k: int = 5 | |
| similarity_threshold: float = 0.4 | |
| # LLM 模型 | |
| llm_model_name: str = "facebook/rag-token-base" | |
| llm_max_length: int = 512 | |
| generation_kwargs: dict = None | |
| # PDF 路径 | |
| pdf_dir: str = "data/pdfs" | |
| vector_db_path: str = "data/embeddings/vector_store.pkl" | |
| # Chunk 配置 | |
| chunk_size: int = 500 | |
| chunk_overlap: int = 100 | |
| def __post_init__(self): | |
| if self.generation_kwargs is None: | |
| self.generation_kwargs = { | |
| "max_new_tokens": 200, | |
| "temperature": 0.7, | |
| "do_sample": True, | |
| } | |