Spaces:

rbcurzon
/

speech-to-text

Sleeping

App Files Files Community

rbcurzon commited on Feb 27

Commit

af03ede

verified ·

1 Parent(s): 2ee630d

Upload app.py

Browse files

Files changed (1) hide show

app.py +95 -0

app.py ADDED Viewed

	@@ -0,0 +1,95 @@

+# -*- coding: utf-8 -*-
+"""main.ipynb
+Automatically generated by Colab.
+Original file is located at
+    https://colab.research.google.com/drive/17Umb-Po_5pESiRv3-dcDRyootgqBjjWM
+"""
+!pip install pipeline
+!apt-get install ffmpeg
+from IPython.display import Audio
+import torch
+from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+# load model and processor
+model_id = "rbcurzon/whisper-small-ceb"
+pipe = pipeline("automatic-speech-recognition", model=model_id, device=device)
+"""**FastAPI**"""
+!pip install fastapi['standard'] pyngrok librosa python-multipart ffmpeg aiofiles
+import io
+import librosa
+from fastapi import FastAPI, WebSocket, UploadFile, File
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from google import genai
+from google.genai import types
+client = genai.Client(api_key="AIzaSyBpJlR45qVLWTHE5EVr5xAJ2oAHB-qFpMc") # Do not share api key
+def translate(text, srcLang, tgtLang):
+    sys_instruct = "You are a professional translator."
+    response = client.models.generate_content(
+        model="gemini-2.0-flash",
+        config=types.GenerateContentConfig(
+            system_instruction=sys_instruct),
+        contents=f"Translate the following from {srcLang} to {tgtLang}. Return nothing but the {tgtLang} translation: {text} ",
+    )
+    print(response)
+    return response.text
+import os
+from tempfile import NamedTemporaryFile
+from fastapi import UploadFile, Form, File
+from pathlib import Path
+from typing import Annotated
+import shutil
+import aiofiles
+# def save_upload_file_tmp(upload_file: UploadFile) -> Path:
+app = FastAPI(
+    title="Real-Time Audio Processor",
+    description="Process and transcribe audio in real-time using Whisper"
+)
+@app.post("/test/")
+async def test(file: UploadFile=File(...),
+               srcLang: str= Form(...),
+               tgtLang: str= Form(...)):
+  # Download audio
+  async with aiofiles.open(file.filename, 'wb') as out_file:
+        content = await file.read()  # async read
+        await out_file.write(content)  # async write
+  result = pipe(content,
+                max_new_tokens=256,
+                chunk_length_s=30,
+                batch_size=8,
+                generate_kwargs={"task": "transcribe", "language": "tagalog"})
+  translatedResult = translate(result['text'], srcLang=srcLang, tgtLang=tgtLang)
+  return {"transcribed_text":result['text'], "translated_text":translatedResult}
+import nest_asyncio
+from pyngrok import ngrok
+import uvicorn
+import numpy as np
+auth_token = "2tAcMI54WtHzQBg2GlUr4wxFtX8_4FWDSjMqCarDhzcLC8mMP"
+ngrok.set_auth_token(auth_token)
+ngrok_tunnel = ngrok.connect(8000)
+print('Public URL:', ngrok_tunnel.public_url)
+nest_asyncio.apply()
+uvicorn.run(app, port=8000)