RAG_Eval / evaluation /config.py
Rom89823974978's picture
Resolved tests issues
79bdbbe
"""Typed configuration objects used across the framework."""
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional, Literal, Union
@dataclass
class LoggingConfig:
"""Logging configuration (rotating file + console)."""
log_dir: Path = Path("logs")
level: str = "INFO" # DEBUG | INFO | WARNING | ERROR | CRITICAL
max_mb: int = 5 # per-file size before rotation
backups: int = 5 # number of rotated files to keep
@dataclass
class CrossEncoderConfig:
"""Configuration for an optional cross-encoder re-ranker."""
enable: bool = False # master switch
model_name: str = "cross-encoder/ms-marco-MiniLM-L-6-v2"
device: str = "cpu"
max_length: int = 512 # truncation length
first_stage_k: int = 50 # how many docs to pass to re-ranker
final_k: Optional[int] = None # override PipelineConfig.retriever.top_k
@dataclass
class RetrieverConfig:
"""Configuration for a retriever back-end."""
name: Literal["bm25", "dense", "hybrid"] = "bm25"
top_k: int = 5
# For backward compatibility with tests: allow index_path alias for sparse
index_path: Optional[Union[str, Path]] = None # alias for bm25_index
# Specific to BM25
bm25_idx: Optional[Union[str, Path]] = None
doc_store: Optional[Union[str, Path]] = None
# For dense-only
faiss_index: Optional[Union[str, Path]] = None
model_name: str = "sentence-transformers/all-MiniLM-L6-v2"
embedder_cache: Optional[Union[str, Path]] = None
device: str = "cpu"
# For hybrid only
alpha: float = 0.5 # sparse ↔ dense weight
def __post_init__(self):
# If index_path is provided (legacy), use it as bm25_index
if self.index_path:
self.bm25_idx = self.index_path
@dataclass
class GeneratorConfig:
"""Configuration for the text generator."""
model_name: str = "google/flan-t5-base"
device: str = "cpu"
max_new_tokens: int = 256
temperature: float = 0.0
@dataclass
class StatsConfig:
"""Configuration for statistical tests & robustness analyses."""
# Correlation (RQ1 & RQ2)
correlation_method: Literal["spearman", "kendall"] = "spearman"
n_boot: int = 1000 # bootstrap replicates for CIs
ci: float = 0.95 # confidence level (e.g. 0.95 = 95 %)
# Significance tests (RQ2)
wilcoxon_alternative: Literal["two-sided", "greater", "less"] = "two-sided"
multiple_correction: Literal["holm-bonferroni", "none"] = "holm-bonferroni"
alpha: float = 0.05 # family-wise error rate
# Robustness / sensitivity (RQ3 & RQ4)
compute_effect_size: bool = True
n_permutations: int = 1000
failure_threshold: float = 0.0
@dataclass
class PipelineConfig:
"""Top-level pipeline configuration."""
logging: LoggingConfig = field(default_factory=LoggingConfig)
reranker: CrossEncoderConfig = field(default_factory=CrossEncoderConfig)
retriever: RetrieverConfig = field(default_factory=RetrieverConfig)
generator: GeneratorConfig = field(default_factory=GeneratorConfig)
stats: StatsConfig = field(default_factory=StatsConfig)