umairali64488 commited on
Commit
c8cd4b6
·
verified ·
1 Parent(s): 47a1a1c

Upload 6 files

Browse files
Files changed (6) hide show
  1. app/__init__.py +0 -0
  2. app/config.py +39 -0
  3. app/llm_chain.py +198 -0
  4. app/llm_factory.py +54 -0
  5. app/main.py +149 -0
  6. app/prompts.py +73 -0
app/__init__.py ADDED
File without changes
app/config.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ config.py — Application settings via pydantic-settings.
3
+ Set OPENROUTER_API_KEY in .env locally or in HF Space Secrets.
4
+ """
5
+ from pydantic_settings import BaseSettings, SettingsConfigDict
6
+
7
+
8
+ class Settings(BaseSettings):
9
+ model_config = SettingsConfigDict(env_file=".env", extra="ignore")
10
+
11
+ # ── OpenRouter ────────────────────────────────────────────────────────────
12
+ OPENROUTER_API_KEY: str = ""
13
+ BASE_URL: str = "https://openrouter.ai/api/v1"
14
+
15
+ # ── Panel LLMs (queried in parallel) ──────────────────────────────────────
16
+ PANEL_MODELS: list = [
17
+ "arcee-ai/trinity-large-preview:free",
18
+ "stepfun/step-3.5-flash:free",
19
+ "nvidia/nemotron-3-nano-30b-a3b:free",
20
+ ]
21
+
22
+ PANEL_MODEL_LABELS: dict = {
23
+ "arcee-ai/trinity-large-preview:free": "Trinity Large",
24
+ "stepfun/step-3.5-flash:free": "StepFun Flash",
25
+ "nvidia/nemotron-3-nano-30b-a3b:free": "Nemotron Nano",
26
+ }
27
+
28
+ # ── Judge LLM ─────────────────────────────────────────────────────────────
29
+ JUDGE_MODEL: str = "qwen/qwen3-30b-a3b:free"
30
+ JUDGE_LABEL: str = "Qwen3 Judge"
31
+
32
+ # ── Generation params ─────────────────────────────────────────────────────
33
+ PANEL_TEMPERATURE: float = 0.3
34
+ JUDGE_TEMPERATURE: float = 0.1
35
+ PANEL_MAX_TOKENS: int = 2048
36
+ JUDGE_MAX_TOKENS: int = 4096
37
+
38
+
39
+ settings = Settings()
app/llm_chain.py ADDED
@@ -0,0 +1,198 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ llm_chain.py — LangChain LCEL pipeline for multi-LLM code debugging.
3
+
4
+ Flow:
5
+ 1. Build one LCEL chain per panel model (PANEL_PROMPT | llm | StrOutputParser)
6
+ 2. Run all 3 chains concurrently with asyncio.gather (ainvoke)
7
+ 3. Feed all results into the judge chain (JUDGE_PROMPT | llm | StrOutputParser)
8
+ 4. Parse judge output into {reasoning, final_answer}
9
+ 5. Return structured dict consumed by the FastAPI endpoint
10
+ """
11
+ import asyncio
12
+ import time
13
+ from typing import Any
14
+
15
+ from langchain_core.output_parsers import StrOutputParser
16
+ from langchain_core.runnables import RunnablePassthrough
17
+
18
+ from app.config import settings
19
+ from app.llm_factory import make_panel_llm, make_judge_llm
20
+ from app.prompts import PANEL_PROMPT, JUDGE_PROMPT
21
+
22
+
23
+ # ── Helpers ───────────────────────────────────────────────────────────────────
24
+
25
+ def _split_judge_output(raw: str) -> tuple[str, str]:
26
+ """
27
+ Split judge output on the markdown headers we asked for.
28
+ Returns (reasoning, final_answer).
29
+ """
30
+ reasoning = ""
31
+ final_answer = raw.strip()
32
+
33
+ if "## ✅ Final Answer" in raw:
34
+ parts = raw.split("## ✅ Final Answer", 1)
35
+ final_answer = parts[1].strip()
36
+ reasoning = parts[0].replace("## 🔍 Reasoning", "").strip()
37
+ elif "## 🔍 Reasoning" in raw:
38
+ reasoning = raw.split("## 🔍 Reasoning", 1)[1].strip()
39
+
40
+ return reasoning, final_answer
41
+
42
+
43
+ def _error_panel(model: str, label: str, error: str, latency_ms: float) -> dict:
44
+ return {
45
+ "model": model,
46
+ "label": label,
47
+ "response": None,
48
+ "latency_ms": latency_ms,
49
+ "error": error,
50
+ }
51
+
52
+
53
+ # ── Panel chain ───────────────────────────────────────────────────────────────
54
+
55
+ def build_panel_chain(model: str, temperature: float):
56
+ """
57
+ LCEL chain for a single panel model:
58
+ PANEL_PROMPT | ChatOpenAI | StrOutputParser
59
+ Input: {"question": str}
60
+ Output: str (model's raw markdown answer)
61
+ """
62
+ llm = make_panel_llm(model, temperature=temperature)
63
+ return PANEL_PROMPT | llm | StrOutputParser()
64
+
65
+
66
+ async def _run_panel(model: str, label: str, question: str, temperature: float) -> dict:
67
+ """
68
+ Invoke one panel chain, return structured result dict.
69
+ Errors are caught so one failing model doesn't abort everything.
70
+ """
71
+ chain = build_panel_chain(model, temperature)
72
+ start = time.perf_counter()
73
+ try:
74
+ response = await chain.ainvoke({"question": question})
75
+ latency = round((time.perf_counter() - start) * 1000, 1)
76
+ return {
77
+ "model": model,
78
+ "label": label,
79
+ "response": response.strip(),
80
+ "latency_ms": latency,
81
+ "error": None,
82
+ }
83
+ except Exception as exc:
84
+ latency = round((time.perf_counter() - start) * 1000, 1)
85
+ return _error_panel(model, label, str(exc), latency)
86
+
87
+
88
+ # ── Judge chain ───────────────────────────────────────────────────────────────
89
+
90
+ def build_judge_chain():
91
+ """
92
+ LCEL chain for the Qwen3 judge:
93
+ JUDGE_PROMPT | ChatOpenAI | StrOutputParser
94
+ Input: {"question", "label_1", "response_1", "label_2", "response_2",
95
+ "label_3", "response_3"}
96
+ Output: str (full judge markdown text)
97
+ """
98
+ llm = make_judge_llm()
99
+ return JUDGE_PROMPT | llm | StrOutputParser()
100
+
101
+
102
+ async def _run_judge(question: str, panel_results: list[dict]) -> dict:
103
+ """
104
+ Build judge inputs from panel results, invoke the judge chain,
105
+ and parse the structured output.
106
+ """
107
+ chain = build_judge_chain()
108
+ start = time.perf_counter()
109
+
110
+ # Pad missing responses so the prompt always has 3 slots
111
+ padded = []
112
+ for r in panel_results:
113
+ padded.append(r)
114
+ while len(padded) < 3:
115
+ padded.append({"label": "N/A", "response": "[No response — model failed]"})
116
+
117
+ def _content(r: dict) -> str:
118
+ if r.get("response"):
119
+ return r["response"]
120
+ return f"[Error: {r.get('error', 'unknown')}]"
121
+
122
+ judge_input = {
123
+ "question": question,
124
+ "label_1": padded[0]["label"],
125
+ "response_1": _content(padded[0]),
126
+ "label_2": padded[1]["label"],
127
+ "response_2": _content(padded[1]),
128
+ "label_3": padded[2]["label"],
129
+ "response_3": _content(padded[2]),
130
+ }
131
+
132
+ try:
133
+ raw = await chain.ainvoke(judge_input)
134
+ latency = round((time.perf_counter() - start) * 1000, 1)
135
+ reasoning, final_answer = _split_judge_output(raw)
136
+ return {
137
+ "model": settings.JUDGE_MODEL,
138
+ "label": settings.JUDGE_LABEL,
139
+ "reasoning": reasoning,
140
+ "final_answer": final_answer,
141
+ "full_response": raw.strip(),
142
+ "latency_ms": latency,
143
+ "error": None,
144
+ }
145
+ except Exception as exc:
146
+ latency = round((time.perf_counter() - start) * 1000, 1)
147
+ return {
148
+ "model": settings.JUDGE_MODEL,
149
+ "label": settings.JUDGE_LABEL,
150
+ "reasoning": "",
151
+ "final_answer": None,
152
+ "full_response": None,
153
+ "latency_ms": latency,
154
+ "error": str(exc),
155
+ }
156
+
157
+
158
+ # ── Main entry point ──────────────────────────────────────────────────────────
159
+
160
+ async def debug_query(question: str, temperature: float = 0.3) -> dict[str, Any]:
161
+ """
162
+ Full multi-LLM pipeline:
163
+ Phase 1 — query 3 panel models concurrently (asyncio.gather)
164
+ Phase 2 — send all results to the Qwen3 judge
165
+
166
+ Returns:
167
+ {
168
+ "question": str,
169
+ "panel": list[dict], # one dict per model
170
+ "judge": dict, # reasoning + final_answer
171
+ "total_ms": float,
172
+ }
173
+ """
174
+ total_start = time.perf_counter()
175
+
176
+ # ── Phase 1: parallel panel calls ────────────────────────────────────────
177
+ panel_tasks = [
178
+ _run_panel(
179
+ model = model,
180
+ label = settings.PANEL_MODEL_LABELS.get(model, model.split("/")[-1]),
181
+ question = question,
182
+ temperature = temperature,
183
+ )
184
+ for model in settings.PANEL_MODELS
185
+ ]
186
+ panel_results: list[dict] = list(await asyncio.gather(*panel_tasks))
187
+
188
+ # ── Phase 2: judge ────────────────────────────────────────────────────────
189
+ judge_result = await _run_judge(question, panel_results)
190
+
191
+ total_ms = round((time.perf_counter() - total_start) * 1000, 1)
192
+
193
+ return {
194
+ "question": question,
195
+ "panel": panel_results,
196
+ "judge": judge_result,
197
+ "total_ms": total_ms,
198
+ }
app/llm_factory.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ llm_factory.py — Creates LangChain ChatOpenAI instances pointing at OpenRouter.
3
+ All LLMs in this project are built through this single factory so config
4
+ stays in one place.
5
+ """
6
+ from langchain_openai import ChatOpenAI
7
+
8
+ from app.config import settings
9
+
10
+
11
+ def make_llm(
12
+ model: str,
13
+ temperature: float | None = None,
14
+ max_tokens: int | None = None,
15
+ ) -> ChatOpenAI:
16
+ """
17
+ Return a LangChain ChatOpenAI client configured for OpenRouter.
18
+
19
+ Args:
20
+ model: OpenRouter model string e.g. "arcee-ai/trinity-large-preview:free"
21
+ temperature: Override; defaults to settings.PANEL_TEMPERATURE
22
+ max_tokens: Override; defaults to settings.PANEL_MAX_TOKENS
23
+ """
24
+ if not settings.OPENROUTER_API_KEY:
25
+ raise ValueError(
26
+ "OPENROUTER_API_KEY is not set. "
27
+ "Add it to your .env file or HF Space Secrets."
28
+ )
29
+
30
+ return ChatOpenAI(
31
+ model=model,
32
+ openai_api_key=settings.OPENROUTER_API_KEY,
33
+ openai_api_base=settings.BASE_URL,
34
+ temperature=temperature if temperature is not None else settings.PANEL_TEMPERATURE,
35
+ max_tokens=max_tokens if max_tokens is not None else settings.PANEL_MAX_TOKENS,
36
+ default_headers={
37
+ "HTTP-Referer": "https://codedebug.local",
38
+ "X-Title": "CodeDebug Multi-LLM Assistant",
39
+ },
40
+ )
41
+
42
+
43
+ def make_panel_llm(model: str, temperature: float | None = None) -> ChatOpenAI:
44
+ """Panel LLM with panel token budget."""
45
+ return make_llm(model, temperature=temperature, max_tokens=settings.PANEL_MAX_TOKENS)
46
+
47
+
48
+ def make_judge_llm() -> ChatOpenAI:
49
+ """Judge LLM with larger token budget and near-zero temperature."""
50
+ return make_llm(
51
+ settings.JUDGE_MODEL,
52
+ temperature=settings.JUDGE_TEMPERATURE,
53
+ max_tokens=settings.JUDGE_MAX_TOKENS,
54
+ )
app/main.py ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ main.py — FastAPI app for the CodeDebug Multi-LLM Debugger.
3
+
4
+ Endpoints:
5
+ GET / → serves frontend/index.html
6
+ GET /health → health check
7
+ GET /api/v1/models → list configured models
8
+ POST /api/v1/debug → main pipeline (3 panel LLMs → Qwen3 judge)
9
+ """
10
+ from contextlib import asynccontextmanager
11
+ from pathlib import Path
12
+
13
+ from fastapi import FastAPI, HTTPException, Request
14
+ from fastapi.exceptions import RequestValidationError
15
+ from fastapi.middleware.cors import CORSMiddleware
16
+ from fastapi.responses import FileResponse, JSONResponse
17
+ from pydantic import BaseModel, Field, validator
18
+
19
+ from app.config import settings
20
+ from app.llm_chain import debug_query
21
+
22
+
23
+ # ── Lifespan ──────────────────────────────────────────────────────────────────
24
+ @asynccontextmanager
25
+ async def lifespan(app: FastAPI):
26
+ print("⚡ Starting CodeDebug Multi-LLM Debugger...")
27
+ print(f" Panel models : {settings.PANEL_MODELS}")
28
+ print(f" Judge model : {settings.JUDGE_MODEL}")
29
+ if not settings.OPENROUTER_API_KEY:
30
+ print("⚠️ WARNING: OPENROUTER_API_KEY is not set — requests will fail!")
31
+ else:
32
+ print("✅ API key loaded. Ready!")
33
+ yield
34
+ print("🛑 Shutting down")
35
+
36
+
37
+ # ── App ───────────────────────────────────────────────────────────────────────
38
+ app = FastAPI(
39
+ title="CodeDebug Multi-LLM Debugger",
40
+ version="2.0.0",
41
+ description="3 panel LLMs in parallel → Qwen3 judge synthesizes the final answer.",
42
+ lifespan=lifespan,
43
+ )
44
+
45
+ app.add_middleware(
46
+ CORSMiddleware,
47
+ allow_origins=["*"],
48
+ allow_methods=["*"],
49
+ allow_headers=["*"],
50
+ )
51
+
52
+
53
+ # ── Validation error handler ──────────────────────────────────────────────────
54
+ @app.exception_handler(RequestValidationError)
55
+ async def validation_error_handler(request: Request, exc: RequestValidationError):
56
+ print(f"❌ Validation error: {exc.errors()}")
57
+ return JSONResponse(
58
+ status_code=422,
59
+ content={"error": str(exc.errors()), "detail": exc.errors()},
60
+ )
61
+
62
+
63
+ # ── Request schema ────────────────────────────────────────────────────────────
64
+ class DebugRequest(BaseModel):
65
+ question: str = Field(..., min_length=1, description="The debugging question or code snippet")
66
+ temperature: float = Field(default=0.3, ge=0.0, le=1.0, description="Sampling temperature for panel models")
67
+
68
+ @validator("temperature", pre=True)
69
+ def coerce_float(cls, v):
70
+ try:
71
+ return float(v)
72
+ except Exception:
73
+ return 0.3
74
+
75
+ @validator("question")
76
+ def strip_question(cls, v):
77
+ v = v.strip()
78
+ if not v:
79
+ raise ValueError("question must not be empty")
80
+ return v
81
+
82
+
83
+ # ── Frontend ──────────────────────────────────────────────────────────────────
84
+ FRONTEND = Path(__file__).parent.parent / "frontend"
85
+
86
+
87
+ @app.get("/", include_in_schema=False)
88
+ def serve_ui():
89
+ """Serve the single-page frontend."""
90
+ index = FRONTEND / "index.html"
91
+ if index.exists():
92
+ return FileResponse(str(index))
93
+ return JSONResponse(
94
+ status_code=200,
95
+ content={"message": "CodeDebug API running. See /docs for endpoints."},
96
+ )
97
+
98
+
99
+ # ── System endpoints ──────────────────────────────────────────────────────────
100
+ @app.get("/health", tags=["system"])
101
+ def health():
102
+ """Health check."""
103
+ return {
104
+ "status": "ok",
105
+ "version": "2.0.0",
106
+ "api_key_set": bool(settings.OPENROUTER_API_KEY),
107
+ }
108
+
109
+
110
+ @app.get("/api/v1/models", tags=["system"])
111
+ def list_models():
112
+ """List all configured models."""
113
+ return {
114
+ "panel_models": [
115
+ {"model": m, "label": settings.PANEL_MODEL_LABELS.get(m, m)}
116
+ for m in settings.PANEL_MODELS
117
+ ],
118
+ "judge_model": {
119
+ "model": settings.JUDGE_MODEL,
120
+ "label": settings.JUDGE_LABEL,
121
+ },
122
+ }
123
+
124
+
125
+ # ── Debug endpoint ────────────────────────────────────────────────────────────
126
+ @app.post("/api/v1/debug", tags=["debug"])
127
+ async def debug(body: DebugRequest):
128
+ """
129
+ Main endpoint.
130
+
131
+ Sends `question` to 3 panel LLMs in parallel (LangChain LCEL chains),
132
+ then forwards all responses to the Qwen3 judge which returns:
133
+ - reasoning : judge's analysis of the three answers
134
+ - final_answer: synthesised definitive answer
135
+ """
136
+ if not settings.OPENROUTER_API_KEY:
137
+ raise HTTPException(
138
+ status_code=503,
139
+ detail="OPENROUTER_API_KEY is not configured on the server.",
140
+ )
141
+
142
+ print(f"📨 '{body.question[:70]}...' | temp={body.temperature}")
143
+
144
+ result = await debug_query(body.question, body.temperature)
145
+
146
+ successful = sum(1 for p in result["panel"] if not p.get("error"))
147
+ print(f"✅ {successful}/3 panel responses · judge {'ok' if not result['judge'].get('error') else 'failed'} · {result['total_ms']}ms")
148
+
149
+ return result
app/prompts.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ prompts.py — All LangChain ChatPromptTemplates in one place.
3
+ Import from here; never hardcode prompts elsewhere.
4
+ """
5
+ from langchain_core.prompts import ChatPromptTemplate
6
+
7
+
8
+ # ── Panel: Code Debugging Prompt ──────────────────────────────────────────────
9
+ PANEL_PROMPT = ChatPromptTemplate.from_messages([
10
+ (
11
+ "system",
12
+ """You are an expert software debugger and code assistant.
13
+
14
+ When analyzing code or debugging questions:
15
+ - Identify the exact bug, error, or issue
16
+ - Explain the ROOT CAUSE clearly
17
+ - Provide a CORRECTED solution with working code examples
18
+ - If multiple issues exist, list all of them
19
+ - Be concise, precise, and professional
20
+
21
+ Use markdown with fenced code blocks (```language) for all code.""",
22
+ ),
23
+ (
24
+ "human",
25
+ "{question}",
26
+ ),
27
+ ])
28
+
29
+
30
+ # ── Judge: Synthesis Prompt ────────────────────────────────────────────────────
31
+ JUDGE_PROMPT = ChatPromptTemplate.from_messages([
32
+ (
33
+ "system",
34
+ """You are a master software engineering judge evaluating responses from multiple AI models.
35
+
36
+ Your job:
37
+ 1. Critically analyze each model's response for correctness, completeness, and clarity
38
+ 2. Identify what each model got RIGHT and WRONG
39
+ 3. Synthesize the single best possible answer, combining the strongest elements
40
+
41
+ Always structure your response EXACTLY like this (keep the exact headers):
42
+
43
+ ## 🔍 Reasoning
44
+ [Your critical analysis: what each model got right/wrong, which answer was most accurate, gaps you noticed]
45
+
46
+ ## ✅ Final Answer
47
+ [The definitive synthesized answer with correct code examples in markdown fenced blocks]""",
48
+ ),
49
+ (
50
+ "human",
51
+ """USER'S DEBUGGING QUESTION:
52
+ {question}
53
+
54
+ ---
55
+
56
+ ### Response 1 — {label_1}
57
+ {response_1}
58
+
59
+ ---
60
+
61
+ ### Response 2 — {label_2}
62
+ {response_2}
63
+
64
+ ---
65
+
66
+ ### Response 3 — {label_3}
67
+ {response_3}
68
+
69
+ ---
70
+
71
+ Now analyze all three responses and provide your reasoning and the definitive final answer.""",
72
+ ),
73
+ ])