blog2code-api / main.py
srishtichugh's picture
pass model explicitly to all pipeline stages
8f2f756
import os, sys, shutil, tempfile, zipfile, asyncio, subprocess
from pathlib import Path
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
REPO_ROOT = Path(__file__).parent.resolve()
CODES_DIR = REPO_ROOT / "codes"
app = FastAPI(title="Blog2Code API", version="1.0.0")
ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "*").split(",")
app.add_middleware(
CORSMiddleware,
allow_origins=ALLOWED_ORIGINS,
allow_methods=["*"],
allow_headers=["*"],
)
def _run(script: str, args: list, extra_env: dict) -> None:
cmd = [sys.executable, str(CODES_DIR / script)] + args
result = subprocess.run(
cmd,
cwd=str(REPO_ROOT),
env={**os.environ, **extra_env},
capture_output=True,
text=True,
)
if result.returncode != 0:
raise RuntimeError(
f"{script} failed (exit {result.returncode}):\n"
f"STDOUT: {result.stdout[-2000:]}\n"
f"STDERR: {result.stderr[-2000:]}"
)
@app.get("/health")
def health():
return {"status": "ok"}
@app.post("/generate")
async def generate(
url: str = Form(None),
file: UploadFile = File(None),
):
if not url and not file:
raise HTTPException(400, "Provide either 'url' or 'file'.")
tmp = Path(tempfile.mkdtemp())
data_dir = tmp / "data"
output_dir = tmp / "output"
data_dir.mkdir(parents=True)
output_dir.mkdir(parents=True)
try:
if file:
suffix = Path(file.filename).suffix or ".md"
input_path = tmp / f"blog{suffix}"
input_path.write_bytes(await file.read())
source_args = ["--input_path", str(input_path)]
else:
source_args = ["--url", url.strip()]
provider = os.getenv("PROVIDER", "gemma")
# Default model for NVIDIA/Llama β€” overridable via MODEL env var
default_model = "meta/llama-3.3-70b-instruct"
model = os.getenv("MODEL", default_model)
extra_env = {"MODEL": model}
blog_json = data_dir / "blog_data.json"
def run_pipeline():
# Stage 0 – parse blog (no LLM, no --model needed)
_run("0_blog_process.py",
source_args + ["--output_json_path", str(blog_json)],
extra_env)
if not blog_json.exists():
candidates = list(data_dir.glob("*.json"))
if not candidates:
raise RuntimeError("Stage 0: no JSON output found.")
blog_json_path = candidates[0]
else:
blog_json_path = blog_json
# Stage 1 – planning
_run("1_planning.py", [
"--blog_json_path", str(blog_json_path),
"--output_dir", str(data_dir),
"--provider", provider,
"--content_type", "blog",
"--model", model,
], extra_env)
# Stage 1.1 – extract config (no LLM, no --model needed)
_run("1_1_extract_config.py", [
"--output_dir", str(data_dir),
], extra_env)
config_yaml = data_dir / "planning_config.yaml"
if not config_yaml.exists():
raise RuntimeError("Stage 1.1: planning_config.yaml not found.")
# Stage 2 – analysis
_run("2_analyzing.py", [
"--pdf_json_path", str(blog_json_path),
"--output_dir", str(data_dir),
"--provider", provider,
"--model", model,
], extra_env)
# Stage 3 – code generation
_run("3_coding.py", [
"--pdf_json_path", str(blog_json_path),
"--output_dir", str(data_dir),
"--output_repo_dir", str(output_dir),
"--provider", provider,
"--model", model,
], extra_env)
await asyncio.get_event_loop().run_in_executor(None, run_pipeline)
zip_path = tmp / "repo.zip"
files = [f for f in output_dir.rglob("*") if f.is_file()]
if not files:
raise HTTPException(500, "Pipeline produced no output files.")
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
for f in files:
zf.write(f, f.relative_to(output_dir))
return FileResponse(
path=str(zip_path),
media_type="application/zip",
filename="generated-repo.zip",
)
except HTTPException:
shutil.rmtree(tmp, ignore_errors=True)
raise
except Exception as exc:
shutil.rmtree(tmp, ignore_errors=True)
raise HTTPException(500, str(exc)) from exc