Spaces:
Sleeping
Sleeping
| """ | |
| models.py — Data classes used across the pipeline. | |
| """ | |
| from dataclasses import dataclass, field | |
| from typing import Dict, List, Optional, Set, Tuple | |
| class Symbol: | |
| """A single extracted symbol (function/method).""" | |
| id: str # "./path.py:ClassName.method" or "./path.py:func" | |
| file: str # absolute path to source file | |
| name: str # bare name like "ClassName.method" or "func" | |
| code: str # source code text | |
| lineno: int = 0 # start line number | |
| class RepositoryIndex: | |
| """Complete index of a repository.""" | |
| symbols: Dict[str, Symbol] = field(default_factory=dict) # id -> Symbol | |
| graph: Dict[str, List[str]] = field(default_factory=dict) # id -> [dependency ids] | |
| broken_files: List[str] = field(default_factory=list) | |
| def reverse_graph(self) -> Dict[str, Set[str]]: | |
| """Build reverse graph (callers of each symbol).""" | |
| rev: Dict[str, Set[str]] = {} | |
| for caller, callees in self.graph.items(): | |
| for callee in callees: | |
| rev.setdefault(callee, set()).add(caller) | |
| return rev | |
| def total_edges(self) -> int: | |
| return sum(len(deps) for deps in self.graph.values()) | |
| class DiffResult: | |
| """Result of comparing two states.""" | |
| modified: List[str] = field(default_factory=list) | |
| added: List[str] = field(default_factory=list) | |
| deleted: List[str] = field(default_factory=list) | |
| broken_files: List[str] = field(default_factory=list) | |
| def all_changed(self) -> List[str]: | |
| return self.modified + self.added | |
| class ImpactResult: | |
| """Result of impact analysis.""" | |
| changed: List[str] = field(default_factory=list) | |
| blast_radius: List[str] = field(default_factory=list) | |
| dependencies: List[str] = field(default_factory=list) | |
| scores: Dict[str, float] = field(default_factory=dict) | |
| def all_relevant(self) -> List[str]: | |
| """All symbols that should be in context, deduplicated, ordered by score.""" | |
| seen = set() | |
| result = [] | |
| # Score-ordered | |
| scored = sorted(self.scores.items(), key=lambda x: x[1], reverse=True) | |
| for sym_id, _ in scored: | |
| if sym_id not in seen: | |
| seen.add(sym_id) | |
| result.append(sym_id) | |
| # Any remaining that weren't scored | |
| for sym_id in self.changed + self.blast_radius + self.dependencies: | |
| if sym_id not in seen: | |
| seen.add(sym_id) | |
| result.append(sym_id) | |
| return result | |
| class ContextPackage: | |
| """Final compiled context for an LLM.""" | |
| text: str | |
| symbol_count: int | |
| token_estimate: int | |
| total_repo_tokens: int | |
| # LLM self-awareness fields | |
| dropped_symbols: List[str] = field(default_factory=list) # scored but cut by budget | |
| skipped_files: List[str] = field(default_factory=list) # SyntaxError'd files | |
| graph_confidence: float = 1.0 # fraction of edges that resolved | |
| def reduction_pct(self) -> float: | |
| if self.total_repo_tokens == 0: | |
| return 0.0 | |
| return (1 - self.token_estimate / self.total_repo_tokens) * 100 | |
| class BenchmarkResult: | |
| """Result of a single benchmark run.""" | |
| name: str | |
| repo_path: str | |
| changed_functions: List[str] | |
| # Graph stats | |
| total_symbols: int = 0 | |
| total_edges: int = 0 | |
| graph_build_ms: float = 0.0 | |
| # Retrieval stats | |
| retrieved_count: int = 0 | |
| retrieved_ids: List[str] = field(default_factory=list) | |
| # Token stats | |
| total_tokens: int = 0 | |
| context_tokens: int = 0 | |
| token_reduction_pct: float = 0.0 | |
| function_reduction_pct: float = 0.0 | |
| # Precision/Recall (when ground truth available) | |
| precision: Optional[float] = None | |
| recall: Optional[float] = None | |
| f1: Optional[float] = None | |
| # Timing | |
| pipeline_ms: float = 0.0 |