technophyle commited on
Commit
f1089a9
·
verified ·
1 Parent(s): 087c947

Sync from GitHub via hub-sync

Browse files
evals/run_eval.py CHANGED
@@ -125,14 +125,24 @@ def post_query(row):
125
  marker in detail_text
126
  for marker in [
127
  "ThrottlingException",
 
128
  "Too many requests",
129
  "timed out",
130
  "timeout",
131
  "ServiceUnavailable",
 
132
  ]
133
  )
134
  if is_retryable and attempt < QUERY_MAX_RETRIES:
135
- wait_seconds = QUERY_RETRY_BASE_SECONDS * (2 ** (attempt - 1))
 
 
 
 
 
 
 
 
136
  log(
137
  f"Retrying case {case_id} after transient query failure "
138
  f"(attempt {attempt}/{QUERY_MAX_RETRIES}, wait={wait_seconds:.1f}s): {detail_text}"
@@ -617,7 +627,7 @@ def build_resume_summary(custom_metrics, audit, ragas_report, ragas_error):
617
  return " ".join(lines)
618
 
619
 
620
- def benchmark_readiness(audit, ragas_error):
621
  reasons = []
622
  if audit["case_count"] < 25:
623
  reasons.append("small_sample")
@@ -629,10 +639,15 @@ def benchmark_readiness(audit, ragas_error):
629
  reasons.append("eval_set_warnings")
630
  if ragas_error not in {None, "disabled"}:
631
  reasons.append("ragas_instability")
 
 
632
 
633
  if reasons:
 
 
 
634
  return {
635
- "status": "internal_or_demo_benchmark",
636
  "reasons": reasons,
637
  }
638
  if audit.get("benchmark_scope", {}).get("type") == "single_repository":
@@ -774,7 +789,7 @@ def run_ragas(rows, outputs):
774
  try:
775
  timeout_seconds = int(os.getenv("EVAL_TIMEOUT_SECONDS", "180"))
776
  thread_timeout_seconds = float(os.getenv("EVAL_THREAD_TIMEOUT_SECONDS", str(max(timeout_seconds, 240))))
777
- max_workers = int(os.getenv("EVAL_MAX_WORKERS", "4"))
778
  run_config = RunConfig(
779
  timeout=timeout_seconds,
780
  thread_timeout=thread_timeout_seconds,
@@ -887,7 +902,7 @@ def run():
887
  headline_metrics = build_headline_metrics(custom_metrics, audit)
888
  metric_guidance = build_metric_guidance(custom_metrics, ragas_report)
889
  resume_summary = build_resume_summary(custom_metrics, audit, ragas_report, ragas_error)
890
- readiness = benchmark_readiness(audit, ragas_error)
891
 
892
  report = {
893
  "config": {
 
125
  marker in detail_text
126
  for marker in [
127
  "ThrottlingException",
128
+ "throttled",
129
  "Too many requests",
130
  "timed out",
131
  "timeout",
132
  "ServiceUnavailable",
133
+ "temporarily unavailable",
134
  ]
135
  )
136
  if is_retryable and attempt < QUERY_MAX_RETRIES:
137
+ retry_after = response.headers.get("Retry-After")
138
+ try:
139
+ wait_seconds = (
140
+ float(retry_after)
141
+ if retry_after
142
+ else QUERY_RETRY_BASE_SECONDS * (2 ** (attempt - 1))
143
+ )
144
+ except ValueError:
145
+ wait_seconds = QUERY_RETRY_BASE_SECONDS * (2 ** (attempt - 1))
146
  log(
147
  f"Retrying case {case_id} after transient query failure "
148
  f"(attempt {attempt}/{QUERY_MAX_RETRIES}, wait={wait_seconds:.1f}s): {detail_text}"
 
627
  return " ".join(lines)
628
 
629
 
630
+ def benchmark_readiness(audit, ragas_error, metric_guidance=None):
631
  reasons = []
632
  if audit["case_count"] < 25:
633
  reasons.append("small_sample")
 
639
  reasons.append("eval_set_warnings")
640
  if ragas_error not in {None, "disabled"}:
641
  reasons.append("ragas_instability")
642
+ if metric_guidance and metric_guidance.get("primary_gate") != "pass":
643
+ reasons.append("primary_gate_failed")
644
 
645
  if reasons:
646
+ status = "single_repo_benchmark_needs_work"
647
+ if audit.get("benchmark_scope", {}).get("type") != "single_repository":
648
+ status = "internal_or_demo_benchmark"
649
  return {
650
+ "status": status,
651
  "reasons": reasons,
652
  }
653
  if audit.get("benchmark_scope", {}).get("type") == "single_repository":
 
789
  try:
790
  timeout_seconds = int(os.getenv("EVAL_TIMEOUT_SECONDS", "180"))
791
  thread_timeout_seconds = float(os.getenv("EVAL_THREAD_TIMEOUT_SECONDS", str(max(timeout_seconds, 240))))
792
+ max_workers = int(os.getenv("EVAL_MAX_WORKERS", "2"))
793
  run_config = RunConfig(
794
  timeout=timeout_seconds,
795
  thread_timeout=thread_timeout_seconds,
 
902
  headline_metrics = build_headline_metrics(custom_metrics, audit)
903
  metric_guidance = build_metric_guidance(custom_metrics, ragas_report)
904
  resume_summary = build_resume_summary(custom_metrics, audit, ragas_report, ragas_error)
905
+ readiness = benchmark_readiness(audit, ragas_error, metric_guidance)
906
 
907
  report = {
908
  "config": {
evals/sample_eval_set.json CHANGED
@@ -6,6 +6,7 @@
6
  "ground_truth": "Documenso is an open-source document signing platform and DocuSign alternative. It lets users create, send, and sign documents electronically while emphasizing self-hosting, trust, and the ability to inspect how the signing system works under the hood.",
7
  "expected_sources": [
8
  "README.md",
 
9
  "ARCHITECTURE.md"
10
  ],
11
  "must_include_any": [
 
6
  "ground_truth": "Documenso is an open-source document signing platform and DocuSign alternative. It lets users create, send, and sign documents electronically while emphasizing self-hosting, trust, and the ability to inspect how the signing system works under the hood.",
7
  "expected_sources": [
8
  "README.md",
9
+ "MANIFEST.md",
10
  "ARCHITECTURE.md"
11
  ],
12
  "must_include_any": [
server_app.py CHANGED
@@ -7,6 +7,7 @@ from fastapi.middleware.cors import CORSMiddleware
7
  from pydantic import BaseModel, Field, HttpUrl
8
  from dotenv import load_dotenv
9
 
 
10
  from src.rag_system import CodebaseRAGSystem
11
 
12
  load_dotenv(Path(__file__).with_name(".env"))
@@ -122,7 +123,19 @@ async def query_repository(request: QueryRequest, session_id: str = Depends(requ
122
  )
123
  except ValueError as exc:
124
  raise HTTPException(status_code=400, detail=str(exc))
 
 
 
 
 
 
125
  except Exception as exc:
 
 
 
 
 
 
126
  raise HTTPException(status_code=500, detail=str(exc))
127
 
128
 
 
7
  from pydantic import BaseModel, Field, HttpUrl
8
  from dotenv import load_dotenv
9
 
10
+ from src.bedrock_claude import BedrockTransientError, is_bedrock_retryable_error
11
  from src.rag_system import CodebaseRAGSystem
12
 
13
  load_dotenv(Path(__file__).with_name(".env"))
 
123
  )
124
  except ValueError as exc:
125
  raise HTTPException(status_code=400, detail=str(exc))
126
+ except BedrockTransientError as exc:
127
+ raise HTTPException(
128
+ status_code=429,
129
+ detail=str(exc),
130
+ headers={"Retry-After": os.getenv("BEDROCK_HTTP_RETRY_AFTER_SECONDS", "10")},
131
+ )
132
  except Exception as exc:
133
+ if is_bedrock_retryable_error(exc):
134
+ raise HTTPException(
135
+ status_code=429,
136
+ detail=f"Bedrock throttled or was temporarily unavailable: {exc}",
137
+ headers={"Retry-After": os.getenv("BEDROCK_HTTP_RETRY_AFTER_SECONDS", "10")},
138
+ )
139
  raise HTTPException(status_code=500, detail=str(exc))
140
 
141
 
src/bedrock_claude.py CHANGED
@@ -1,16 +1,59 @@
1
  import os
 
 
2
  from typing import Optional, Tuple
3
 
4
 
 
 
 
 
5
  def create_bedrock_runtime_client():
6
  try:
7
  import boto3
8
  except ImportError as exc:
9
  raise RuntimeError("Bedrock Claude support requires the `boto3` package.") from exc
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  return boto3.client(
12
  "bedrock-runtime",
13
  region_name=os.getenv("AWS_REGION", os.getenv("AWS_DEFAULT_REGION", "us-east-1")),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  )
15
 
16
 
@@ -31,17 +74,45 @@ def generate_bedrock_claude_text(
31
  if top_p is not None:
32
  inference_config["topP"] = top_p
33
 
34
- response = client.converse(
35
- modelId=model,
36
- system=[{"text": system_prompt.strip()}],
37
- messages=[
38
- {
39
- "role": "user",
40
- "content": [{"text": user_prompt.strip()}],
41
- }
42
- ],
43
- inferenceConfig=inference_config,
44
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  content_blocks = (((response or {}).get("output") or {}).get("message") or {}).get("content") or []
47
  text = "".join(block.get("text", "") for block in content_blocks if block.get("text")).strip()
 
1
  import os
2
+ import random
3
+ import time
4
  from typing import Optional, Tuple
5
 
6
 
7
+ class BedrockTransientError(RuntimeError):
8
+ pass
9
+
10
+
11
  def create_bedrock_runtime_client():
12
  try:
13
  import boto3
14
  except ImportError as exc:
15
  raise RuntimeError("Bedrock Claude support requires the `boto3` package.") from exc
16
 
17
+ max_attempts = int(os.getenv("BEDROCK_CLIENT_MAX_ATTEMPTS", "10"))
18
+ try:
19
+ from botocore.config import Config
20
+
21
+ config = Config(
22
+ retries={
23
+ "max_attempts": max_attempts,
24
+ "mode": os.getenv("BEDROCK_RETRY_MODE", "adaptive"),
25
+ }
26
+ )
27
+ except Exception:
28
+ config = None
29
+
30
  return boto3.client(
31
  "bedrock-runtime",
32
  region_name=os.getenv("AWS_REGION", os.getenv("AWS_DEFAULT_REGION", "us-east-1")),
33
+ config=config,
34
+ )
35
+
36
+
37
+ def is_bedrock_retryable_error(exc: Exception) -> bool:
38
+ code = ""
39
+ response = getattr(exc, "response", None)
40
+ if isinstance(response, dict):
41
+ code = str((response.get("Error") or {}).get("Code") or "")
42
+
43
+ message = str(exc)
44
+ normalized = f"{code} {message}".lower()
45
+ return any(
46
+ marker in normalized
47
+ for marker in {
48
+ "throttling",
49
+ "too many requests",
50
+ "rate exceeded",
51
+ "serviceunavailable",
52
+ "service unavailable",
53
+ "timeout",
54
+ "timed out",
55
+ "temporarily unavailable",
56
+ }
57
  )
58
 
59
 
 
74
  if top_p is not None:
75
  inference_config["topP"] = top_p
76
 
77
+ max_retries = int(os.getenv("BEDROCK_LLM_MAX_RETRIES", "6"))
78
+ base_seconds = float(os.getenv("BEDROCK_LLM_RETRY_BASE_SECONDS", "1.5"))
79
+ max_sleep_seconds = float(os.getenv("BEDROCK_LLM_RETRY_MAX_SECONDS", "30"))
80
+ last_exc = None
81
+
82
+ for attempt in range(1, max_retries + 1):
83
+ try:
84
+ response = client.converse(
85
+ modelId=model,
86
+ system=[{"text": system_prompt.strip()}],
87
+ messages=[
88
+ {
89
+ "role": "user",
90
+ "content": [{"text": user_prompt.strip()}],
91
+ }
92
+ ],
93
+ inferenceConfig=inference_config,
94
+ )
95
+ break
96
+ except Exception as exc:
97
+ if not is_bedrock_retryable_error(exc) or attempt >= max_retries:
98
+ if is_bedrock_retryable_error(exc):
99
+ raise BedrockTransientError(
100
+ f"Bedrock throttled or was temporarily unavailable after "
101
+ f"{attempt} attempts: {exc}"
102
+ ) from exc
103
+ raise
104
+
105
+ last_exc = exc
106
+ sleep_seconds = min(max_sleep_seconds, base_seconds * (2 ** (attempt - 1)))
107
+ sleep_seconds += random.uniform(0.0, min(1.0, sleep_seconds * 0.2))
108
+ print(
109
+ f"[bedrock] Transient Bedrock error; retrying "
110
+ f"attempt={attempt}/{max_retries} wait={sleep_seconds:.1f}s error={exc}",
111
+ flush=True,
112
+ )
113
+ time.sleep(sleep_seconds)
114
+ else:
115
+ raise BedrockTransientError(f"Bedrock request failed after retries: {last_exc}")
116
 
117
  content_blocks = (((response or {}).get("output") or {}).get("message") or {}).get("content") or []
118
  text = "".join(block.get("text", "") for block in content_blocks if block.get("text")).strip()
src/rag_system.py CHANGED
@@ -361,7 +361,14 @@ class CodebaseRAGSystem:
361
  "setup",
362
  "tests",
363
  }
364
- search_depth = top_k * 4 if question_intent in deep_search_intents else top_k * 2
 
 
 
 
 
 
 
365
  retrieval_query = self._build_retrieval_query(question, normalized_history)
366
  query_embedding = self.embedder.embed_text(retrieval_query)
367
  semantic_hits = []
@@ -377,6 +384,13 @@ class CodebaseRAGSystem:
377
  )
378
  semantic_hits = self.hybrid_search.normalize_semantic_results(semantic_hits)
379
  fused = self.hybrid_search.reciprocal_rank_fusion(lexical_hits, semantic_hits, top_k=search_depth)
 
 
 
 
 
 
 
380
  rerank_query = retrieval_query if question_intent in deep_search_intents else question
381
  reranked = self.hybrid_search.rerank(rerank_query, fused, top_k=search_depth)
382
  reranked = self._prioritize_results(question, retrieval_query, reranked, top_k=top_k)
@@ -846,6 +860,107 @@ Do not leave the answer unfinished.
846
  parts.append(f"Previous answer: {recent_assistant[0][:300]}")
847
  return "\n".join(parts)
848
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
849
  def _prioritize_results(
850
  self,
851
  question: str,
@@ -858,16 +973,20 @@ Do not leave the answer unfinished.
858
  token in combined_query
859
  for token in {"code", "snippet", "implementation", "function", "class", "import"}
860
  )
861
- wants_docs = self._is_documentation_query(combined_query)
 
 
 
 
862
  wants_repo_overview = self._is_repo_overview_question(question) or self._is_repo_overview_question(
863
  retrieval_query
864
  )
865
- question_intent = self._question_intent(question)
866
 
867
  def sort_key(item: dict):
868
  is_doc = self._is_doc_source(item)
869
  return (
870
- self._canonical_path_priority(item, question),
 
871
  self._doc_priority(item),
872
  1 if wants_repo_overview and is_doc else 0,
873
  1 if (wants_docs and is_doc) or (not wants_docs and not is_doc) else 0,
@@ -953,19 +1072,81 @@ Do not leave the answer unfinished.
953
  normalized = " ".join((question or "").lower().split())
954
  if not normalized:
955
  return "general"
 
 
 
 
 
 
956
  if CodebaseRAGSystem._is_repo_overview_question(normalized):
957
  return "overview"
958
- if any(token in normalized for token in {"test", "tests", "pytest", "spec"}):
959
  return "tests"
960
- if any(token in normalized for token in {"error", "invalid", "conflict", "raises", "guard against"}):
961
  return "error_handling"
962
- if any(token in normalized for token in {"how are", "how does", "flow", "across files", "code path"}):
963
- return "cross_file"
964
- if any(token in normalized for token in {"export", "expose", "import", "public api"}):
 
 
 
 
 
 
 
 
 
 
 
 
965
  return "api"
966
- if any(token in normalized for token in {"create", "setup", "install", "configuration", "metadata", "table"}):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
967
  return "setup"
968
- if any(token in normalized for token in {"function", "method", "class", "implementation", "does ", "what is special"}):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
969
  return "implementation"
970
  if CodebaseRAGSystem._is_documentation_query(normalized):
971
  return "docs"
@@ -997,6 +1178,7 @@ Do not leave the answer unfinished.
997
  hints.extend(["create", "setup", "configure", "initialize", "schema", "README.md", "docs"])
998
  if "__init__" in lowered or "exports" in lowered:
999
  hints.extend(["__init__.py", "package exports", "public api"])
 
1000
 
1001
  for term in sorted(code_terms):
1002
  parts = [part for part in re.split(r"[._/-]+", term) if len(part) > 2]
@@ -1049,6 +1231,200 @@ Do not leave the answer unfinished.
1049
  return 1
1050
  return 0
1051
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1052
  def _canonical_path_priority(self, item: dict, question: str) -> int:
1053
  file_path = (item.get("file_path") or "").lower()
1054
  source_text = " ".join(
@@ -1065,12 +1441,20 @@ Do not leave the answer unfinished.
1065
  intent = self._question_intent(question)
1066
  code_terms = self._query_code_terms(question)
1067
  path_fragments = self._query_path_fragments(question)
 
1068
  score = 0
1069
 
1070
  for fragment in path_fragments:
1071
  if file_path == fragment or file_path.endswith(f"/{fragment}") or fragment in file_path:
1072
  score += 8
1073
 
 
 
 
 
 
 
 
1074
  matched_terms = {term for term in code_terms if term in source_text}
1075
  score += min(len(matched_terms), 6)
1076
 
@@ -1136,7 +1520,9 @@ Do not leave the answer unfinished.
1136
  "error_handling",
1137
  "tests",
1138
  }:
1139
- score -= 1
 
 
1140
 
1141
  return score
1142
 
 
361
  "setup",
362
  "tests",
363
  }
364
+ deep_multiplier = int(os.getenv("RAG_DEEP_SEARCH_MULTIPLIER", "8"))
365
+ shallow_multiplier = int(os.getenv("RAG_SEARCH_MULTIPLIER", "4"))
366
+ search_depth = (
367
+ top_k * deep_multiplier
368
+ if question_intent in deep_search_intents
369
+ else top_k * shallow_multiplier
370
+ )
371
+ search_depth = max(top_k, min(search_depth, 120))
372
  retrieval_query = self._build_retrieval_query(question, normalized_history)
373
  query_embedding = self.embedder.embed_text(retrieval_query)
374
  semantic_hits = []
 
384
  )
385
  semantic_hits = self.hybrid_search.normalize_semantic_results(semantic_hits)
386
  fused = self.hybrid_search.reciprocal_rank_fusion(lexical_hits, semantic_hits, top_k=search_depth)
387
+ path_hits = self._path_intent_search(
388
+ self.repo_chunks[repo_id],
389
+ question,
390
+ retrieval_query,
391
+ top_k=search_depth,
392
+ )
393
+ fused = self._merge_ranked_candidates(fused, path_hits, top_k=search_depth)
394
  rerank_query = retrieval_query if question_intent in deep_search_intents else question
395
  reranked = self.hybrid_search.rerank(rerank_query, fused, top_k=search_depth)
396
  reranked = self._prioritize_results(question, retrieval_query, reranked, top_k=top_k)
 
860
  parts.append(f"Previous answer: {recent_assistant[0][:300]}")
861
  return "\n".join(parts)
862
 
863
+ def _merge_ranked_candidates(
864
+ self,
865
+ ranked_results: List[dict],
866
+ path_results: List[dict],
867
+ top_k: int,
868
+ ) -> List[dict]:
869
+ merged = {}
870
+
871
+ for rank, item in enumerate(ranked_results, start=1):
872
+ enriched = dict(item)
873
+ enriched.setdefault("rrf_score", 0.0)
874
+ enriched["candidate_rank"] = rank
875
+ merged[enriched["id"]] = enriched
876
+
877
+ for rank, item in enumerate(path_results, start=1):
878
+ existing = merged.get(item["id"])
879
+ path_bonus = 1.0 / (20 + rank)
880
+ if existing is None:
881
+ enriched = dict(item)
882
+ enriched["rrf_score"] = float(enriched.get("rrf_score", 0.0)) + path_bonus
883
+ enriched["path_rank"] = rank
884
+ merged[enriched["id"]] = enriched
885
+ continue
886
+
887
+ existing.update({key: value for key, value in item.items() if key not in existing})
888
+ existing["rrf_score"] = float(existing.get("rrf_score", 0.0)) + path_bonus
889
+ existing["path_rank"] = rank
890
+
891
+ return sorted(
892
+ merged.values(),
893
+ key=lambda item: (
894
+ float(item.get("rrf_score", 0.0)),
895
+ float(item.get("path_score", 0.0)),
896
+ float(item.get("semantic_score", 0.0)),
897
+ ),
898
+ reverse=True,
899
+ )[:top_k]
900
+
901
+ def _path_intent_search(
902
+ self,
903
+ chunks: List[dict],
904
+ question: str,
905
+ retrieval_query: str,
906
+ top_k: int,
907
+ ) -> List[dict]:
908
+ if not chunks:
909
+ return []
910
+
911
+ combined_query = f"{question}\n{retrieval_query}"
912
+ path_hints = self._domain_path_hints(combined_query)
913
+ code_terms = self._query_code_terms(combined_query)
914
+ if not path_hints and not code_terms:
915
+ return []
916
+
917
+ scored = []
918
+ path_fragments = self._query_path_fragments(combined_query)
919
+ for item in chunks:
920
+ score = 0
921
+ file_path = (item.get("file_path") or "").lower()
922
+ text = " ".join(
923
+ [
924
+ file_path,
925
+ str(item.get("symbol_name") or "").lower(),
926
+ str(item.get("signature") or "").lower(),
927
+ str(item.get("content") or "")[:500].lower(),
928
+ ]
929
+ )
930
+
931
+ for fragment in path_fragments:
932
+ if file_path == fragment or file_path.endswith(f"/{fragment}") or fragment in file_path:
933
+ score += 12
934
+
935
+ for hint in path_hints:
936
+ normalized_hint = hint.rstrip("/").lower()
937
+ if file_path == normalized_hint or file_path.startswith(normalized_hint + "/"):
938
+ score += 10
939
+ elif normalized_hint in file_path:
940
+ score += 6
941
+
942
+ if code_terms:
943
+ score += min(sum(1 for term in code_terms if term in text), 8)
944
+
945
+ if score <= 0:
946
+ continue
947
+
948
+ score += max(self._canonical_path_priority(item, combined_query), 0)
949
+
950
+ enriched = dict(item)
951
+ enriched["path_score"] = float(score)
952
+ scored.append(enriched)
953
+
954
+ scored.sort(
955
+ key=lambda item: (
956
+ float(item.get("path_score", 0.0)),
957
+ float(item.get("bm25_score", 0.0)),
958
+ float(item.get("semantic_score", 0.0)),
959
+ ),
960
+ reverse=True,
961
+ )
962
+ return scored[:top_k]
963
+
964
  def _prioritize_results(
965
  self,
966
  question: str,
 
973
  token in combined_query
974
  for token in {"code", "snippet", "implementation", "function", "class", "import"}
975
  )
976
+ question_intent = self._question_intent(question)
977
+ wants_docs = self._is_documentation_query(combined_query) and question_intent in {
978
+ "docs",
979
+ "overview",
980
+ }
981
  wants_repo_overview = self._is_repo_overview_question(question) or self._is_repo_overview_question(
982
  retrieval_query
983
  )
 
984
 
985
  def sort_key(item: dict):
986
  is_doc = self._is_doc_source(item)
987
  return (
988
+ self._canonical_path_priority(item, combined_query),
989
+ float(item.get("path_score", 0.0)),
990
  self._doc_priority(item),
991
  1 if wants_repo_overview and is_doc else 0,
992
  1 if (wants_docs and is_doc) or (not wants_docs and not is_doc) else 0,
 
1072
  normalized = " ".join((question or "").lower().split())
1073
  if not normalized:
1074
  return "general"
1075
+ def has_any(terms: set[str]) -> bool:
1076
+ return any(
1077
+ re.search(rf"(?<![a-z0-9]){re.escape(term)}(?![a-z0-9])", normalized)
1078
+ for term in terms
1079
+ )
1080
+
1081
  if CodebaseRAGSystem._is_repo_overview_question(normalized):
1082
  return "overview"
1083
+ if has_any({"test", "tests", "pytest", "spec"}):
1084
  return "tests"
1085
+ if has_any({"error", "invalid", "conflict", "raises", "guard against"}):
1086
  return "error_handling"
1087
+ if has_any(
1088
+ {
1089
+ "api",
1090
+ "api v1",
1091
+ "api v2",
1092
+ "endpoint",
1093
+ "frontend",
1094
+ "backend",
1095
+ "openapi",
1096
+ "public api",
1097
+ "route",
1098
+ "router",
1099
+ "trpc",
1100
+ }
1101
+ ):
1102
  return "api"
1103
+ if has_any(
1104
+ {
1105
+ "build",
1106
+ "configure",
1107
+ "configured",
1108
+ "configuration",
1109
+ "create",
1110
+ "database",
1111
+ "env",
1112
+ "environment",
1113
+ "install",
1114
+ "local development",
1115
+ "metadata",
1116
+ "orchestration",
1117
+ "self-host",
1118
+ "self hosting",
1119
+ "setup",
1120
+ "table",
1121
+ "workspace",
1122
+ }
1123
+ ):
1124
  return "setup"
1125
+ if has_any({"flow", "across", "across files", "connect", "code path"}):
1126
+ return "cross_file"
1127
+ if has_any(
1128
+ {
1129
+ "behavior",
1130
+ "class",
1131
+ "function",
1132
+ "implementation",
1133
+ "implemented",
1134
+ "job",
1135
+ "jobs",
1136
+ "lifecycle",
1137
+ "lives",
1138
+ "method",
1139
+ "represented",
1140
+ "signing",
1141
+ "webhook",
1142
+ "webhooks",
1143
+ "what is special",
1144
+ "where does",
1145
+ "where is",
1146
+ "where should",
1147
+ "where would",
1148
+ }
1149
+ ):
1150
  return "implementation"
1151
  if CodebaseRAGSystem._is_documentation_query(normalized):
1152
  return "docs"
 
1178
  hints.extend(["create", "setup", "configure", "initialize", "schema", "README.md", "docs"])
1179
  if "__init__" in lowered or "exports" in lowered:
1180
  hints.extend(["__init__.py", "package exports", "public api"])
1181
+ hints.extend(self._domain_path_hints(normalized))
1182
 
1183
  for term in sorted(code_terms):
1184
  parts = [part for part in re.split(r"[._/-]+", term) if len(part) > 2]
 
1231
  return 1
1232
  return 0
1233
 
1234
+ @staticmethod
1235
+ def _domain_path_hints(query: str) -> List[str]:
1236
+ normalized = " ".join((query or "").lower().split())
1237
+ hints = []
1238
+
1239
+ def has_any(terms: set[str]) -> bool:
1240
+ matched = False
1241
+ for term in terms:
1242
+ if term.startswith("/"):
1243
+ matched = matched or term in normalized
1244
+ continue
1245
+ matched = matched or bool(
1246
+ re.search(rf"(?<![a-z0-9]){re.escape(term)}(?![a-z0-9])", normalized)
1247
+ )
1248
+ return matched
1249
+
1250
+ rules = [
1251
+ (
1252
+ {"email", "emails", "mailer", "notification", "template"},
1253
+ [
1254
+ "packages/email",
1255
+ "packages/lib/server-only/email",
1256
+ "packages/lib/jobs/definitions/emails",
1257
+ ],
1258
+ ),
1259
+ (
1260
+ {"signing", "certificate", "gcloud", "hsm", "p12", "pdf signing"},
1261
+ [
1262
+ "packages/signing",
1263
+ "packages/signing/transports",
1264
+ "packages/signing/helpers",
1265
+ "packages/lib/jobs/definitions/internal/seal-document",
1266
+ ],
1267
+ ),
1268
+ (
1269
+ {"job", "jobs", "background", "inngest", "bullmq", "queue"},
1270
+ [
1271
+ "packages/lib/jobs",
1272
+ "packages/lib/jobs/client",
1273
+ "packages/lib/jobs/definitions",
1274
+ "apps/remix/server/router.ts",
1275
+ ],
1276
+ ),
1277
+ (
1278
+ {"webhook", "webhooks", "ssrf"},
1279
+ [
1280
+ "packages/lib/server-only/webhooks",
1281
+ "packages/lib/jobs/definitions/internal/execute-webhook",
1282
+ ],
1283
+ ),
1284
+ (
1285
+ {"recipient", "recipients"},
1286
+ [
1287
+ "packages/lib/server-only/recipient",
1288
+ "packages/trpc/server/recipient-router",
1289
+ ],
1290
+ ),
1291
+ (
1292
+ {"field", "fields"},
1293
+ [
1294
+ "packages/lib/server-only/field",
1295
+ "packages/trpc/server/field-router",
1296
+ "packages/lib/universal/field-renderer",
1297
+ ],
1298
+ ),
1299
+ (
1300
+ {"template", "templates"},
1301
+ [
1302
+ "packages/lib/server-only/template",
1303
+ "packages/trpc/server/template-router",
1304
+ ],
1305
+ ),
1306
+ (
1307
+ {"envelope", "envelopes"},
1308
+ [
1309
+ "packages/lib/server-only/envelope",
1310
+ "packages/lib/server-only/envelope-item",
1311
+ "packages/trpc/server/envelope-router",
1312
+ ],
1313
+ ),
1314
+ (
1315
+ {"document", "documents"},
1316
+ [
1317
+ "packages/lib/server-only/document",
1318
+ "packages/lib/server-only/document-data",
1319
+ "packages/trpc/server/document-router",
1320
+ ],
1321
+ ),
1322
+ (
1323
+ {"pdf", "storage", "upload", "s3"},
1324
+ [
1325
+ "packages/lib/server-only/pdf",
1326
+ "packages/lib/server-only/document-data",
1327
+ "packages/lib/universal/upload",
1328
+ "apps/remix/server/api/files",
1329
+ ],
1330
+ ),
1331
+ (
1332
+ {"api v1", "/api/v1", "ts-rest", "ts rest"},
1333
+ [
1334
+ "packages/api",
1335
+ "packages/api/v1",
1336
+ "packages/api/hono.ts",
1337
+ ],
1338
+ ),
1339
+ (
1340
+ {"api v2", "/api/v2", "openapi", "trpc-to-openapi"},
1341
+ [
1342
+ "packages/trpc/server",
1343
+ "packages/trpc/server/open-api.ts",
1344
+ "apps/remix/server/router.ts",
1345
+ "apps/remix/server/trpc",
1346
+ ],
1347
+ ),
1348
+ (
1349
+ {"trpc", "frontend", "backend", "/api/trpc", "internal api"},
1350
+ [
1351
+ "packages/trpc",
1352
+ "packages/trpc/react",
1353
+ "packages/trpc/client",
1354
+ "packages/trpc/server/context.ts",
1355
+ "apps/remix/server/trpc",
1356
+ "apps/remix/server/router.ts",
1357
+ ],
1358
+ ),
1359
+ (
1360
+ {"auth", "authentication", "session", "api token", "authorization", "bearer"},
1361
+ [
1362
+ "packages/auth",
1363
+ "packages/lib/server-only/auth",
1364
+ "packages/lib/server-only/public-api",
1365
+ "packages/trpc/server/context.ts",
1366
+ "packages/trpc/server/trpc.ts",
1367
+ "packages/api/v1/middleware/authenticated.ts",
1368
+ "apps/remix/server/context.ts",
1369
+ ],
1370
+ ),
1371
+ (
1372
+ {"database", "postgres", "postgresql", "prisma", "kysely", "migration"},
1373
+ [
1374
+ "packages/prisma",
1375
+ "packages/prisma/schema.prisma",
1376
+ "packages/prisma/migrations",
1377
+ ".env.example",
1378
+ ],
1379
+ ),
1380
+ (
1381
+ {"remix", "hono", "react router", "route", "routes", "user interface"},
1382
+ [
1383
+ "apps/remix/server",
1384
+ "apps/remix/app/routes",
1385
+ "apps/remix/app/root.tsx",
1386
+ "apps/remix/app/routes.ts",
1387
+ ],
1388
+ ),
1389
+ (
1390
+ {"test", "tests", "e2e", "playwright", "spec", "vitest"},
1391
+ [
1392
+ "packages/app-tests",
1393
+ "packages/lib/vitest.config.ts",
1394
+ "packages/lib/package.json",
1395
+ ],
1396
+ ),
1397
+ (
1398
+ {
1399
+ "config",
1400
+ "configuration",
1401
+ "env",
1402
+ "environment",
1403
+ "local development",
1404
+ "self-host",
1405
+ "self hosting",
1406
+ "workspace",
1407
+ "workspaces",
1408
+ "turborepo",
1409
+ "turbo",
1410
+ },
1411
+ [
1412
+ ".env.example",
1413
+ "README.md",
1414
+ "package.json",
1415
+ "turbo.json",
1416
+ "apps/docs/content/docs/developers/local-development",
1417
+ "apps/docs/content/docs/self-hosting/configuration",
1418
+ ],
1419
+ ),
1420
+ ]
1421
+
1422
+ for terms, paths in rules:
1423
+ if has_any(terms):
1424
+ hints.extend(paths)
1425
+
1426
+ return list(dict.fromkeys(hints))
1427
+
1428
  def _canonical_path_priority(self, item: dict, question: str) -> int:
1429
  file_path = (item.get("file_path") or "").lower()
1430
  source_text = " ".join(
 
1441
  intent = self._question_intent(question)
1442
  code_terms = self._query_code_terms(question)
1443
  path_fragments = self._query_path_fragments(question)
1444
+ path_hints = self._domain_path_hints(question)
1445
  score = 0
1446
 
1447
  for fragment in path_fragments:
1448
  if file_path == fragment or file_path.endswith(f"/{fragment}") or fragment in file_path:
1449
  score += 8
1450
 
1451
+ for path_hint in path_hints:
1452
+ normalized_hint = path_hint.rstrip("/").lower()
1453
+ if file_path == normalized_hint or file_path.startswith(normalized_hint + "/"):
1454
+ score += 8
1455
+ elif normalized_hint in file_path:
1456
+ score += 4
1457
+
1458
  matched_terms = {term for term in code_terms if term in source_text}
1459
  score += min(len(matched_terms), 6)
1460
 
 
1520
  "error_handling",
1521
  "tests",
1522
  }:
1523
+ score -= 3
1524
+ if file_path.startswith(".agents/") or file_path.startswith(".opencode/"):
1525
+ score -= 8
1526
 
1527
  return score
1528
 
src/repo_fetcher.py CHANGED
@@ -45,13 +45,23 @@ IGNORED_FILENAMES = {
45
  }
46
 
47
  IGNORED_DIRS = {
 
 
48
  ".git",
 
49
  ".next",
 
 
 
 
50
  ".turbo",
 
51
  "dist",
52
  "build",
53
  "coverage",
 
54
  "node_modules",
 
55
  "vendor",
56
  ".venv",
57
  "venv",
 
45
  }
46
 
47
  IGNORED_DIRS = {
48
+ ".agents",
49
+ ".cache",
50
  ".git",
51
+ ".mypy_cache",
52
  ".next",
53
+ ".opencode",
54
+ ".parcel-cache",
55
+ ".pytest_cache",
56
+ ".ruff_cache",
57
  ".turbo",
58
+ ".vite",
59
  "dist",
60
  "build",
61
  "coverage",
62
+ "logs",
63
  "node_modules",
64
+ "tmp",
65
  "vendor",
66
  ".venv",
67
  "venv",