File size: 2,614 Bytes
af03ede
 
 
 
 
 
 
 
110b876
af03ede
 
 
 
 
 
83733e7
af03ede
 
 
 
4a603d0
af03ede
 
 
 
 
 
 
67c6abd
af03ede
 
83733e7
af03ede
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c794494
af03ede
 
 
 
 
 
 
 
 
 
 
 
 
 
c794494
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# -*- coding: utf-8 -*-
"""main.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/17Umb-Po_5pESiRv3-dcDRyootgqBjjWM
"""

import torch
from transformers import WhisperProcessor, WhisperForConditionalGeneration, pipeline

device = "cuda:0" if torch.cuda.is_available() else "cpu"

# load model and processor
model_id = "rbcurzon/whisper-small-fil"
pipe = pipeline("automatic-speech-recognition", model=model_id, device=device)

"""**FastAPI**"""

import os
import io
from fastapi import FastAPI, WebSocket, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from google import genai
from google.genai import types

client = genai.Client(api_key=os.environ.get("GENAI_API_KEY")) # Do not share api key

def translate(text, srcLang, tgtLang):
    sys_instruct = "You are a professional translator. Do not give explanation."
    response = client.models.generate_content(
        model="gemini-2.0-flash",
        config=types.GenerateContentConfig(
            system_instruction=sys_instruct),
        contents=f"Translate the following from {srcLang} to {tgtLang}. Return nothing but the {tgtLang} translation: {text} ",
    )
    print(response)
    return response.text

from tempfile import NamedTemporaryFile
from fastapi import UploadFile, Form, File
from pathlib import Path
from typing import Annotated
import shutil
import aiofiles

# def save_upload_file_tmp(upload_file: UploadFile) -> Path:


app = FastAPI(
    title="Real-Time Audio Processor",
    description="Process and transcribe audio in real-time using Whisper"
)

@app.post("/translateAudio/")
async def test(file: UploadFile=File(...),
               srcLang: str= Form(...),
               tgtLang: str= Form(...)):
  # Download audio
  async with aiofiles.open(file.filename, 'wb') as out_file:
        content = await file.read()  # async read
        await out_file.write(content)  # async write

  result = pipe(content,
                max_new_tokens=256,
                chunk_length_s=30,
                batch_size=8,
                generate_kwargs={"task": "transcribe", "language": "tagalog"})
  translatedResult = translate(result['text'], srcLang=srcLang, tgtLang=tgtLang)
  return {"transcribed_text":result['text'], "translated_text":translatedResult}


@app.post("/translateText/")
async def test(text: str,
               srcLang: str= Form(...),
               tgtLang: str= Form(...)):
    result = translate(text, srcLang, tgtLang)
    print('Raw: ',text)
    print('Translated: ', result)
    return result;