Spaces:

srishtichugh
/

blog2code-api

Running

App Files Files Community

blog2code-api / main.py

srishtichugh

pass model explicitly to all pipeline stages

8f2f756 6 days ago

raw

history blame contribute delete

4.87 kB

	import os, sys, shutil, tempfile, zipfile, asyncio, subprocess
	from pathlib import Path
	from fastapi import FastAPI, UploadFile, File, Form, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import FileResponse

	REPO_ROOT = Path(__file__).parent.resolve()
	CODES_DIR = REPO_ROOT / "codes"

	app = FastAPI(title="Blog2Code API", version="1.0.0")
	ALLOWED_ORIGINS = os.getenv("ALLOWED_ORIGINS", "*").split(",")
	app.add_middleware(
	CORSMiddleware,
	allow_origins=ALLOWED_ORIGINS,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	def _run(script: str, args: list, extra_env: dict) -> None:
	cmd = [sys.executable, str(CODES_DIR / script)] + args
	result = subprocess.run(
	cmd,
	cwd=str(REPO_ROOT),
	env={os.environ, extra_env},
	capture_output=True,
	text=True,
	)
	if result.returncode != 0:
	raise RuntimeError(
	f"{script} failed (exit {result.returncode}):\n"
	f"STDOUT: {result.stdout[-2000:]}\n"
	f"STDERR: {result.stderr[-2000:]}"
	)

	@app.get("/health")
	def health():
	return {"status": "ok"}

	@app.post("/generate")
	async def generate(
	url: str = Form(None),
	file: UploadFile = File(None),
	):
	if not url and not file:
	raise HTTPException(400, "Provide either 'url' or 'file'.")

	tmp = Path(tempfile.mkdtemp())
	data_dir = tmp / "data"
	output_dir = tmp / "output"
	data_dir.mkdir(parents=True)
	output_dir.mkdir(parents=True)

	try:
	if file:
	suffix = Path(file.filename).suffix or ".md"
	input_path = tmp / f"blog{suffix}"
	input_path.write_bytes(await file.read())
	source_args = ["--input_path", str(input_path)]
	else:
	source_args = ["--url", url.strip()]

	provider = os.getenv("PROVIDER", "gemma")
	# Default model for NVIDIA/Llama — overridable via MODEL env var
	default_model = "meta/llama-3.3-70b-instruct"
	model = os.getenv("MODEL", default_model)
	extra_env = {"MODEL": model}

	blog_json = data_dir / "blog_data.json"

	def run_pipeline():
	# Stage 0 – parse blog (no LLM, no --model needed)
	_run("0_blog_process.py",
	source_args + ["--output_json_path", str(blog_json)],
	extra_env)

	if not blog_json.exists():
	candidates = list(data_dir.glob("*.json"))
	if not candidates:
	raise RuntimeError("Stage 0: no JSON output found.")
	blog_json_path = candidates[0]
	else:
	blog_json_path = blog_json

	# Stage 1 – planning
	_run("1_planning.py", [
	"--blog_json_path", str(blog_json_path),
	"--output_dir", str(data_dir),
	"--provider", provider,
	"--content_type", "blog",
	"--model", model,
	], extra_env)

	# Stage 1.1 – extract config (no LLM, no --model needed)
	_run("1_1_extract_config.py", [
	"--output_dir", str(data_dir),
	], extra_env)

	config_yaml = data_dir / "planning_config.yaml"
	if not config_yaml.exists():
	raise RuntimeError("Stage 1.1: planning_config.yaml not found.")

	# Stage 2 – analysis
	_run("2_analyzing.py", [
	"--pdf_json_path", str(blog_json_path),
	"--output_dir", str(data_dir),
	"--provider", provider,
	"--model", model,
	], extra_env)

	# Stage 3 – code generation
	_run("3_coding.py", [
	"--pdf_json_path", str(blog_json_path),
	"--output_dir", str(data_dir),
	"--output_repo_dir", str(output_dir),
	"--provider", provider,
	"--model", model,
	], extra_env)

	await asyncio.get_event_loop().run_in_executor(None, run_pipeline)

	zip_path = tmp / "repo.zip"
	files = [f for f in output_dir.rglob("*") if f.is_file()]
	if not files:
	raise HTTPException(500, "Pipeline produced no output files.")

	with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
	for f in files:
	zf.write(f, f.relative_to(output_dir))

	return FileResponse(
	path=str(zip_path),
	media_type="application/zip",
	filename="generated-repo.zip",
	)

	except HTTPException:
	shutil.rmtree(tmp, ignore_errors=True)
	raise
	except Exception as exc:
	shutil.rmtree(tmp, ignore_errors=True)
	raise HTTPException(500, str(exc)) from exc