|
|
|
|
|
from fastapi import FastAPI, UploadFile, File |
|
|
import soundfile as sf |
|
|
import ctranslate2 |
|
|
from transformers import WhisperProcessor |
|
|
import os |
|
|
os.environ["HF_HOME"] = "/tmp/hf_cache" |
|
|
|
|
|
|
|
|
app = FastAPI() |
|
|
|
|
|
|
|
|
print("✅ THIS IS NEW APP.PY VERSION") |
|
|
processor = WhisperProcessor.from_pretrained("./whisper_processor") |
|
|
model = ctranslate2.Whisper("ct2_model", compute_type="int8", device="cpu") |
|
|
|
|
|
@app.get("/") |
|
|
def root(): |
|
|
return { |
|
|
"message": "CTranslate2 Whisper API is running.", |
|
|
"usage": "POST /transcribe with an audio file (.wav, .mp3, etc.)" |
|
|
} |
|
|
|
|
|
@app.post("/transcribe") |
|
|
async def transcribe(file: UploadFile = File(...)): |
|
|
|
|
|
temp_path = f"/tmp/{file.filename}" |
|
|
with open(temp_path, "wb") as f: |
|
|
f.write(await file.read()) |
|
|
|
|
|
|
|
|
audio_input, sample_rate = sf.read(temp_path) |
|
|
inputs = processor(audio_input, sampling_rate=sample_rate, return_tensors="np") |
|
|
features = inputs.input_features[0] |
|
|
|
|
|
|
|
|
results = model.generate(features) |
|
|
tokens = results[0]["tokens"] |
|
|
text = processor.decode(tokens, skip_special_tokens=True) |
|
|
|
|
|
return {"text": text} |
|
|
|