CodeFlow / app.py
Rishi-Jain-27's picture
Updated README, added tracing capabilities, changed size of flowchart, vendored the animation assets so this meets off the grid.
1433b16
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
import gradio as gr
from gradio import Server
from fastapi.responses import HTMLResponse, PlainTextResponse, FileResponse # frontend + traces + vendored static assets
import mimetypes
from typing import Any, cast # to resolve PyLance freaking out over llama-cpp-python in the generate_flowchart function
from textwrap import dedent
from pathlib import Path # load the custom frontend from disk
import re # remove thinking tag from response
import json, time, uuid # agent-trace logging
from datetime import datetime, timezone
# ----- Get Model ----- #
# Download Q4_K_M GGUF file from the repo
model_path = hf_hub_download(
repo_id="unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF",
filename="Qwen3-Coder-30B-A3B-Instruct-UD-Q3_K_XL.gguf" # fallback: Q2_K_XL
)
# Initialize llama.cpp with the local cached path
llm = Llama(
model_path=model_path,
n_ctx=4096,
n_threads=2
)
# ----- Init App ----- #
app = gr.Server(title="Code-to-Flowchart Generator")
# ----- Agent traces ----- #
# Each generation appends one JSON line capturing the full LLM call (input code,
# the model's reasoning, output Mermaid + linemap, token usage, latency).
# Download the whole log from the running app at /traces .
MODEL_NAME = "unsloth/Qwen3-Coder-30B-A3B-Instruct-GGUF:UD-Q3_K_XL"
TRACE_PATH = Path(__file__).parent / "agent_traces.jsonl"
def write_trace(record: dict) -> None:
try:
with open(TRACE_PATH, "a", encoding="utf-8") as f:
f.write(json.dumps(record, ensure_ascii=False) + "\n")
except Exception:
pass # tracing must never break generation
# ----- Functions ----- #
# This is a cleaning function to resolve common syntax errors.
def quote_labels(text: str) -> str:
# Mermaid node labels can't hold raw code characters, so quote-wrap each label body
# A label's real closing bracket is followed by a Mermaid connector, edge-label, pipe, statement end, or EOL
# operators after a subscript (== < <= > >= != %) are never mistaken for a close.
END = r'(?=\s*(?:[-<][-.>xo]|==[>=xo]|\||;|$))'
def esc(body: str) -> str:
return (body.replace('"', "'")
.replace('[', '&#91;').replace(']', '&#93;')
.replace('{', '&#123;').replace('}', '&#125;'))
out = []
for line in text.split('\n'):
line = re.sub(r'(?<=\w)\[(.*?)\]' + END, lambda m: '["' + esc(m.group(1)) + '"]', line)
line = re.sub(r'(?<=\w)\{(.*?)\}' + END, lambda m: '{"' + esc(m.group(1)) + '"}', line)
out.append(line)
return '\n'.join(out)
# Parse the model's <linemap> block into {nodeId: [startLine, endLine]}.
# Tolerant of junk lines; drops any entry whose line(s) fall outside the source.
def parse_linemap(block: str, num_lines: int) -> dict:
out: dict = {}
for raw in block.strip().splitlines():
m = re.match(r'\s*([A-Za-z]\w*)\s*:\s*(\d+)(?:\s*-\s*(\d+))?\s*$', raw)
if not m:
continue
a = int(m.group(2))
b = int(m.group(3)) if m.group(3) else a
if a > b:
a, b = b, a
if num_lines and 1 <= a <= num_lines and 1 <= b <= num_lines:
out[m.group(1)] = [a, b]
return out
@app.api(name="generate_flowchart")
def generate_flowchart(src_code: str) -> dict:
# check if src_code is empty
if not src_code.strip(): return {"mermaid": "", "linemap": {}}
# Number the source lines so the model can cite them in the <linemap> block.
src_lines = src_code.splitlines()
num_lines = len(src_lines)
numbered = "\n".join(f"{i}| {ln}" for i, ln in enumerate(src_lines, 1))
# Set system prompt
system_prompt = dedent("""
## Role/Persona
You are a senior staff software architect and compiler engineer specializing in visual control-flow mapping. Your philosophy is pure utility: you translate raw execution logic into highly accurate, scannable, structural diagrams without any conversational filler, meta-commentary, or stylistic fluff.
## Context/Objective
The user will provide source code files or logic snippets. Your sole objective is to parse the syntax and output a corresponding, valid Mermaid.js flowchart graph. This graph will be rendered natively in a production UI to help developers audit execution paths at a glance.
## Strict Constraints
<constraints>
1. OUTPUT FORMAT: Output valid, raw Mermaid.js syntax, immediately followed by the required <linemap> block (constraint 5). Nothing else.
2. NO MARKDOWN FENCING: Do not wrap the output in ```mermaid or ``` blocks. Start directly with the Mermaid graph definition, for example: graph TD.
3. NO PROSE: Do not include introductory text, explanations, or concluding remarks. If the code cannot be parsed, output an isolated error node.
4. NODE NAMING: Paraphrase conditions into plain words — never put raw code, operators, quotes, parentheses, or square brackets/subscripts inside labels (write Index in bounds?, not i < len(nums); write Element is even?, not nums[i] % 2 == 0)
5. SOURCE MAP: The user's code is prefixed with `N| ` line numbers (these are references, never copy the `N| ` prefix into a label). After the diagram, output a <linemap> block: one `NodeId: N` per node, where N is the 1-based source line that node represents (use `NodeId: start-end` for a multi-line construct). Omit purely structural Start/End nodes that correspond to no source line.
</constraints>
<banned_vocabulary>
- Here is the flowchart
- ```mermaid
- ```
- Note:
- Explanation:
- In this diagram
- As requested
</banned_vocabulary>
## Response Workflow
Before outputting the final diagram syntax, perform structural parsing inside a hidden <thinking> tag according to these steps:
1. Identify all conditional branches, including if/else, loops, including for/while, and termination points, including return/throw.
2. Map out the execution flow nodes chronologically.
3. Verify that every opening bracket and node label matching syntax, including [ ], ( ), and { }, is perfectly balanced and closed according to Mermaid specifications.
4. Ensure no markdown formatting tags leak past the closing </thinking> tag.
## Few-Shot Examples
Input:
1| def check_status(val):
2| if val > 10:
3| return "Active"
4| else:
5| return "Inactive"
Output:
<thinking>
1. Control structures: One conditional check, two return branches.
2. Nodes: A Start, B Conditional, C Active return, D Inactive return.
3. Source lines: def is line 1, the if is line 2, Active return is line 3, Inactive return is line 5.
</thinking>
graph TD
A[Start: check_status] --> B{val > 10}
B -- True --> C[Return 'Active']
B -- False --> D[Return 'Inactive']
<linemap>
A: 1
B: 2
C: 3
D: 5
</linemap>
""").strip()
# Reset the cache per request so no cross-request bleeding
llm.reset()
# Casting else PyLance gets mad
t0 = time.perf_counter()
response = cast(Any, llm.create_chat_completion(
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": numbered}
],
temperature=0.1, # Keep it quite deterministic for now
max_tokens=1024,
stream=False
))
latency_ms = round((time.perf_counter() - t0) * 1000)
raw = response["choices"][0]["message"]["content"]
usage = response.get("usage", {}) or {}
# Capture the model's hidden reasoning for the trace, then strip the tags
think = re.search(r'<thinking>(.*?)</thinking>', raw, flags=re.DOTALL)
reasoning = think.group(1).strip() if think else ""
content = re.sub(r'<thinking>.*?</thinking>', '', raw, flags=re.DOTALL)
# Extract + strip the node→line map, then validate it against the source length
linemap: dict = {}
lm = re.search(r'<linemap>(.*?)</linemap>', content, flags=re.DOTALL)
if lm:
linemap = parse_linemap(lm.group(1), num_lines)
content = content[:lm.start()] + content[lm.end():]
# Quote-wrap each node label and escape any leaked code characters
mermaid = quote_labels(content).strip() # and remove excess whitespace
# ----- Agent trace (append-only JSONL; downloadable at /traces) -----
write_trace({
"id": uuid.uuid4().hex,
"ts": datetime.now(timezone.utc).isoformat(),
"event": "generate_flowchart",
"model": MODEL_NAME,
"params": {"temperature": 0.1, "max_tokens": 1024, "n_ctx": 4096},
"input": {"src_code": src_code, "num_lines": num_lines},
"reasoning": reasoning,
"output": {"raw": raw, "mermaid": mermaid, "linemap": linemap},
"usage": {
"prompt_tokens": usage.get("prompt_tokens"),
"completion_tokens": usage.get("completion_tokens"),
"total_tokens": usage.get("total_tokens"),
},
"latency_ms": latency_ms,
"status": "ok",
})
return {"mermaid": mermaid, "linemap": linemap}
# ----- Custom Frontend ----- #
# Served from frontend.html so the same file can be opened directly in a
# browser (file://) to preview the UI without loading the model.
index_html = (Path(__file__).parent / "frontend.html").read_text(encoding="utf-8")
# Load the custom HTML
# / takes precedent over default Blocks UI
@app.get("/")
def index():
return HTMLResponse(index_html)
# Serve the vendored frontend assets (Mermaid, CodeMirror bundle, Gradio client,
# fonts) locally so the app needs NO external CDN/API at runtime.
STATIC_DIR = (Path(__file__).parent / "static").resolve()
mimetypes.add_type("text/javascript", ".js")
mimetypes.add_type("font/woff2", ".woff2")
@app.get("/static/{fname:path}")
def static_files(fname: str):
fp = (STATIC_DIR / fname).resolve()
# contain to STATIC_DIR (no path traversal) and require a real file
if not str(fp).startswith(str(STATIC_DIR) + "/") or not fp.is_file():
return PlainTextResponse("not found", status_code=404)
mt, _ = mimetypes.guess_type(str(fp))
return FileResponse(fp, media_type=mt or "application/octet-stream")
# Download every agent trace collected this run (one JSON object per line).
# curl https://<your-space>/traces > agent_traces.jsonl
@app.get("/traces")
def traces():
text = TRACE_PATH.read_text(encoding="utf-8") if TRACE_PATH.exists() else ""
return PlainTextResponse(text, media_type="application/x-ndjson",
headers={"Content-Disposition": 'attachment; filename="agent_traces.jsonl"'})
app.launch(share=False) # no external gradio.live tunnel — fully self-hosted