Spaces:

AryanSingh04
/

video-summ-api

Sleeping

App Files Files Community

video-summ-api / app.py

AryanSingh04

Update app.py

2d93393 verified 20 days ago

raw

history blame contribute delete

4.7 kB

	import os
	import tempfile
	import subprocess
	from typing import Tuple

	from fastapi import FastAPI, File, UploadFile
	from fastapi.middleware.cors import CORSMiddleware
	from fastapi.responses import JSONResponse

	# --- Put caches in writable paths for Spaces BEFORE any HF imports ---
	os.environ.setdefault("HF_HOME", "/tmp/huggingface")
	os.environ.setdefault("XDG_CACHE_HOME", "/tmp")

	from faster_whisper import WhisperModel
	from transformers import pipeline

	app = FastAPI(title="Video → Title & Summary (Open Source)")

	# CORS so your React app can call this API from anywhere
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"], # tighten to your domain in production
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# --------- Model loading (once) ---------
	print("Loading models...")
	# Whisper: choose tiny \| base \| small ; small = better accuracy, slower
	WHISPER_SIZE = os.getenv("WHISPER_SIZE", "small")
	# CPU-friendly: int8 compute; uses ~1–2 GB RAM for "small"
	whisper_model = WhisperModel(WHISPER_SIZE, device="cpu", compute_type="int8")

	# Summarizer: compact & solid
	summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum")


	# Title generator (tiny T5). You can switch to flan-t5-base if you upgrade hardware.
	title_gen = pipeline("text2text-generation", model="google/flan-t5-small")
	print("Models loaded.")


	# --------- Helpers ---------
	def extract_audio_wav_16k_mono(video_path: str) -> str:
	"""Extract 16kHz mono WAV from a video file using system ffmpeg."""
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
	audio_path = tmp.name
	cmd = [
	"ffmpeg",
	"-y",
	"-i", video_path,
	"-vn",
	"-acodec", "pcm_s16le",
	"-ar", "16000",
	"-ac", "1",
	audio_path,
	]
	subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
	return audio_path


	def safe_trim(text: str, max_chars: int) -> str:
	"""Trim at word boundary to keep inputs within model limits."""
	text = (text or "").strip()
	if len(text) <= max_chars:
	return text
	return text[:max_chars].rsplit(" ", 1)[0] + "..."


	def summarize_and_title(transcript: str) -> Tuple[str, str]:
	"""Return (title, summary) from transcript using open-source models."""
	trimmed = safe_trim(transcript, 4000) # rough guard for token limits

	# Summary
	summary = summarizer(
	trimmed,
	max_length=140,
	min_length=40,
	do_sample=False,
	)[0]["summary_text"].strip()

	# Title
	title_prompt = (
	"Write a short, catchy YouTube-style title (<= 8 words) for this summary:\n"
	f"{summary}"
	)

	title = title_gen(title_prompt, max_new_tokens=16, num_return_sequences=1)[0]["generated_text"].strip()

	# Tidying
	title = title.replace('"', "").replace("\n", " ").strip()
	title = safe_trim(title, 80)

	return title, summary


	# --------- API ---------
	@app.post("/process_video")
	async def process_video(file: UploadFile = File(...)):
	"""
	Accepts a video under form field 'file'.
	Returns JSON: { "title": str, "summary": str }
	"""
	tmp_video = None
	tmp_audio = None
	try:
	# Save uploaded video
	suffix = os.path.splitext(file.filename or "")[1] or ".mp4"
	with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tv:
	tmp_video = tv.name
	tv.write(await file.read())

	# Extract audio with ffmpeg
	tmp_audio = extract_audio_wav_16k_mono(tmp_video)

	# Transcribe (auto language). You can force English via language="en"
	segments, _info = whisper_model.transcribe(tmp_audio)
	transcript = " ".join(seg.text for seg in segments).strip()

	if not transcript:
	return JSONResponse({"error": "No speech detected in the video."}, status_code=400)

	# Summarize + Title
	title, summary = summarize_and_title(transcript)

	return JSONResponse({"title": title, "summary": summary})

	except subprocess.CalledProcessError as e:
	return JSONResponse({"error": "ffmpeg failed", "detail": str(e)}, status_code=500)
	except Exception as e:
	return JSONResponse({"error": str(e)}, status_code=500)
	finally:
	# Cleanup
	for p in (tmp_audio, tmp_video):
	if p and os.path.exists(p):
	try:
	os.remove(p)
	except Exception:
	pass


	@app.get("/")
	def root():
	return {
	"ok": True,
	"message": "POST a video to /process_video with form field 'file'.",
	"docs": "/docs"
	}