Spaces:
Sleeping
Sleeping
File size: 3,999 Bytes
036a2db | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 | """
models.py — Data classes used across the pipeline.
"""
from dataclasses import dataclass, field
from typing import Dict, List, Optional, Set, Tuple
@dataclass
class Symbol:
"""A single extracted symbol (function/method)."""
id: str # "./path.py:ClassName.method" or "./path.py:func"
file: str # absolute path to source file
name: str # bare name like "ClassName.method" or "func"
code: str # source code text
lineno: int = 0 # start line number
@dataclass
class RepositoryIndex:
"""Complete index of a repository."""
symbols: Dict[str, Symbol] = field(default_factory=dict) # id -> Symbol
graph: Dict[str, List[str]] = field(default_factory=dict) # id -> [dependency ids]
broken_files: List[str] = field(default_factory=list)
@property
def reverse_graph(self) -> Dict[str, Set[str]]:
"""Build reverse graph (callers of each symbol)."""
rev: Dict[str, Set[str]] = {}
for caller, callees in self.graph.items():
for callee in callees:
rev.setdefault(callee, set()).add(caller)
return rev
@property
def total_edges(self) -> int:
return sum(len(deps) for deps in self.graph.values())
@dataclass
class DiffResult:
"""Result of comparing two states."""
modified: List[str] = field(default_factory=list)
added: List[str] = field(default_factory=list)
deleted: List[str] = field(default_factory=list)
broken_files: List[str] = field(default_factory=list)
@property
def all_changed(self) -> List[str]:
return self.modified + self.added
@dataclass
class ImpactResult:
"""Result of impact analysis."""
changed: List[str] = field(default_factory=list)
blast_radius: List[str] = field(default_factory=list)
dependencies: List[str] = field(default_factory=list)
scores: Dict[str, float] = field(default_factory=dict)
@property
def all_relevant(self) -> List[str]:
"""All symbols that should be in context, deduplicated, ordered by score."""
seen = set()
result = []
# Score-ordered
scored = sorted(self.scores.items(), key=lambda x: x[1], reverse=True)
for sym_id, _ in scored:
if sym_id not in seen:
seen.add(sym_id)
result.append(sym_id)
# Any remaining that weren't scored
for sym_id in self.changed + self.blast_radius + self.dependencies:
if sym_id not in seen:
seen.add(sym_id)
result.append(sym_id)
return result
@dataclass
class ContextPackage:
"""Final compiled context for an LLM."""
text: str
symbol_count: int
token_estimate: int
total_repo_tokens: int
# LLM self-awareness fields
dropped_symbols: List[str] = field(default_factory=list) # scored but cut by budget
skipped_files: List[str] = field(default_factory=list) # SyntaxError'd files
graph_confidence: float = 1.0 # fraction of edges that resolved
@property
def reduction_pct(self) -> float:
if self.total_repo_tokens == 0:
return 0.0
return (1 - self.token_estimate / self.total_repo_tokens) * 100
@dataclass
class BenchmarkResult:
"""Result of a single benchmark run."""
name: str
repo_path: str
changed_functions: List[str]
# Graph stats
total_symbols: int = 0
total_edges: int = 0
graph_build_ms: float = 0.0
# Retrieval stats
retrieved_count: int = 0
retrieved_ids: List[str] = field(default_factory=list)
# Token stats
total_tokens: int = 0
context_tokens: int = 0
token_reduction_pct: float = 0.0
function_reduction_pct: float = 0.0
# Precision/Recall (when ground truth available)
precision: Optional[float] = None
recall: Optional[float] = None
f1: Optional[float] = None
# Timing
pipeline_ms: float = 0.0 |