import os import tempfile from typing import Optional import whisper import nltk from nltk.tokenize import sent_tokenize from fastapi import FastAPI, UploadFile, File, Form, HTTPException, Query from fastapi.middleware.cors import CORSMiddleware from transformers import pipeline # ---------------- Config Paths ---------------- RUNNING_IN_COLAB = os.path.exists("/content") if RUNNING_IN_COLAB: BASE_DIR = "/content" else: BASE_DIR = "/tmp" CACHE_DIR = os.path.join(BASE_DIR, "huggingface") UPLOAD_DIR = os.path.join(BASE_DIR, "uploads") NLTK_DIR = os.path.join(BASE_DIR, "nltk_data") os.makedirs(CACHE_DIR, exist_ok=True) os.makedirs(UPLOAD_DIR, exist_ok=True) os.makedirs(NLTK_DIR, exist_ok=True) os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR os.environ["HF_HOME"] = CACHE_DIR os.environ["XDG_CACHE_HOME"] = CACHE_DIR nltk.data.path.append(NLTK_DIR) # ---------------- NLTK Setup ---------------- try: nltk.data.find("tokenizers/punkt") except LookupError: nltk.download("punkt", download_dir=NLTK_DIR, quiet=True) try: nltk.data.find("tokenizers/punkt_tab") except LookupError: nltk.download("punkt_tab", download_dir=NLTK_DIR, quiet=True) # ---------------- Load Models ---------------- asr_model = whisper.load_model("base") summarizer = pipeline("summarization", model="facebook/bart-large-cnn") # ---------------- FastAPI App ---------------- app = FastAPI( title="Voice2Text API", description="Audio Transcription + Summarization + Default Audio Processing", version="2.4.0" ) app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) # ---------------- Routes ---------------- @app.post("/transcribe/") async def transcribe_audio( file: Optional[UploadFile] = File(None), summary: Optional[bool] = Form(True) ): """Transcribe uploaded audio or default audio if no file provided""" tmp_path = None try: if file is not None: # Uploaded audio with tempfile.NamedTemporaryFile(delete=False, dir=UPLOAD_DIR, suffix=".wav") as tmp: content = await file.read() tmp.write(content) tmp_path = tmp.name else: # Default audio default_audio_path = os.path.join(UPLOAD_DIR, "default_audio.wav") if not os.path.exists(default_audio_path): raise HTTPException( status_code=404, detail="Default audio file not found on server." ) tmp_path = default_audio_path # Transcribe result = asr_model.transcribe(tmp_path) transcription = result.get("text", "") # Summarize if needed if summary and transcription.strip(): sentences = sent_tokenize(transcription) chunks = [" ".join(sentences[i:i + 3]) for i in range(0, len(sentences), 3)] summarized_text = " ".join( summarizer(chunk, max_length=60, min_length=25, do_sample=False)[0]["summary_text"] for chunk in chunks ) return {"transcription": transcription, "summary": summarized_text.strip()} return {"transcription": transcription} except Exception as e: raise HTTPException(status_code=500, detail=f"Processing failed: {e}") finally: if file is not None and tmp_path and os.path.exists(tmp_path): os.remove(tmp_path) @app.get("/auto_summarize/") async def auto_summarize(): """Automatically summarize default audio if exists, else summarize sample text""" default_audio_path = os.path.join(UPLOAD_DIR, "default_audio.wav") if os.path.exists(default_audio_path): # Default audio exists → transcribe + summarize result = asr_model.transcribe(default_audio_path) transcription = result.get("text", "") if transcription.strip(): sentences = sent_tokenize(transcription) chunks = [" ".join(sentences[i:i + 3]) for i in range(0, len(sentences), 3)] summarized_text = " ".join( summarizer(chunk, max_length=60, min_length=25, do_sample=False)[0]["summary_text"] for chunk in chunks ) return {"transcription": transcription, "summary": summarized_text.strip()} else: return {"transcription": "", "summary": ""} else: # Default audio missing → summarize sample text sample_text = """ Artificial Intelligence and Machine Learning are transforming industries by enabling automation, advanced analytics, and data-driven decision making. These technologies are particularly useful in healthcare, finance, and education. """ summary = summarizer(sample_text, max_length=100, min_length=25, do_sample=False) return {"transcription": None, "summary": summary[0]["summary_text"]} @app.get("/summarize_direct/") async def summarize_direct(): """Direct URL hit → summarizes default sample text""" sample_text = """ Artificial Intelligence and Machine Learning are transforming industries by enabling automation, advanced analytics, and data-driven decision making. These technologies are particularly useful in healthcare, finance, and education. """ summary = summarizer(sample_text, max_length=100, min_length=25, do_sample=False) return {"summary": summary[0]["summary_text"]} @app.get("/summarize_text/") async def summarize_text(text: str = Query(..., description="Text to summarize")): """Summarize custom text passed as query parameter""" if not text.strip(): raise HTTPException(status_code=400, detail="Text input cannot be empty.") summary = summarizer(text, max_length=120, min_length=30, do_sample=False) return {"summary": summary[0]["summary_text"]}