rbcurzon commited on
Commit
af03ede
·
verified ·
1 Parent(s): 2ee630d

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """main.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/17Umb-Po_5pESiRv3-dcDRyootgqBjjWM
8
+ """
9
+
10
+ !pip install pipeline
11
+ !apt-get install ffmpeg
12
+
13
+ from IPython.display import Audio
14
+
15
+ import torch
16
+ from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline
17
+
18
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
19
+
20
+ # load model and processor
21
+ model_id = "rbcurzon/whisper-small-ceb"
22
+ pipe = pipeline("automatic-speech-recognition", model=model_id, device=device)
23
+
24
+ """**FastAPI**"""
25
+
26
+ !pip install fastapi['standard'] pyngrok librosa python-multipart ffmpeg aiofiles
27
+
28
+ import io
29
+ import librosa
30
+ from fastapi import FastAPI, WebSocket, UploadFile, File
31
+ from fastapi.middleware.cors import CORSMiddleware
32
+ from pydantic import BaseModel
33
+ from google import genai
34
+ from google.genai import types
35
+
36
+ client = genai.Client(api_key="AIzaSyBpJlR45qVLWTHE5EVr5xAJ2oAHB-qFpMc") # Do not share api key
37
+
38
+ def translate(text, srcLang, tgtLang):
39
+ sys_instruct = "You are a professional translator."
40
+ response = client.models.generate_content(
41
+ model="gemini-2.0-flash",
42
+ config=types.GenerateContentConfig(
43
+ system_instruction=sys_instruct),
44
+ contents=f"Translate the following from {srcLang} to {tgtLang}. Return nothing but the {tgtLang} translation: {text} ",
45
+ )
46
+ print(response)
47
+ return response.text
48
+
49
+ import os
50
+ from tempfile import NamedTemporaryFile
51
+ from fastapi import UploadFile, Form, File
52
+ from pathlib import Path
53
+ from typing import Annotated
54
+ import shutil
55
+ import aiofiles
56
+
57
+ # def save_upload_file_tmp(upload_file: UploadFile) -> Path:
58
+
59
+
60
+ app = FastAPI(
61
+ title="Real-Time Audio Processor",
62
+ description="Process and transcribe audio in real-time using Whisper"
63
+ )
64
+
65
+ @app.post("/test/")
66
+ async def test(file: UploadFile=File(...),
67
+ srcLang: str= Form(...),
68
+ tgtLang: str= Form(...)):
69
+ # Download audio
70
+ async with aiofiles.open(file.filename, 'wb') as out_file:
71
+ content = await file.read() # async read
72
+ await out_file.write(content) # async write
73
+
74
+ result = pipe(content,
75
+ max_new_tokens=256,
76
+ chunk_length_s=30,
77
+ batch_size=8,
78
+ generate_kwargs={"task": "transcribe", "language": "tagalog"})
79
+ translatedResult = translate(result['text'], srcLang=srcLang, tgtLang=tgtLang)
80
+ return {"transcribed_text":result['text'], "translated_text":translatedResult}
81
+
82
+ import nest_asyncio
83
+ from pyngrok import ngrok
84
+ import uvicorn
85
+ import numpy as np
86
+
87
+ auth_token = "2tAcMI54WtHzQBg2GlUr4wxFtX8_4FWDSjMqCarDhzcLC8mMP"
88
+
89
+ ngrok.set_auth_token(auth_token)
90
+
91
+ ngrok_tunnel = ngrok.connect(8000)
92
+ print('Public URL:', ngrok_tunnel.public_url)
93
+ nest_asyncio.apply()
94
+ uvicorn.run(app, port=8000)
95
+