|
|
|
import torch |
|
from fastapi import FastAPI, HTTPException, UploadFile, File |
|
import uvicorn |
|
import numpy as np |
|
import io |
|
from pydub import AudioSegment |
|
import time |
|
import logging |
|
from transformers import WhisperProcessor, WhisperForConditionalGeneration |
|
|
|
logging.basicConfig(level=logging.INFO) |
|
logger = logging.getLogger(__name__) |
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
|
|
|
|
|
|
processor = WhisperProcessor.from_pretrained("Ivydata/whisper-small-japanese") |
|
model = WhisperForConditionalGeneration.from_pretrained("Ivydata/whisper-small-japanese").to(device) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def transcribe_whisper(audio_data_bytes): |
|
try: |
|
start_time = time.time() |
|
audio_segment = AudioSegment.from_mp3(io.BytesIO(audio_data_bytes)) |
|
|
|
|
|
audio_data_int16 = np.array(audio_segment.get_array_of_samples()) |
|
|
|
audio_data_float32 = audio_data_int16.astype(np.float32) / 32768.0 |
|
|
|
|
|
input_features = processor(audio=audio_data_float32, |
|
sampling_rate=audio_segment.frame_rate, |
|
return_tensors="pt").input_features.to(device) |
|
|
|
predicted_ids = model.generate(input_features=input_features) |
|
|
|
result = processor.batch_decode(predicted_ids, skip_special_tokens=True) |
|
resultText = result[0] if isinstance(result, list) and len(result) > 0 else str(result) |
|
end_time = time.time() |
|
print(f"Time taken: {end_time - start_time} seconds") |
|
return resultText |
|
except Exception as e: |
|
raise HTTPException(status_code=500, detail=str(e)) |
|
|
|
|
|
app = FastAPI() |
|
|
|
@app.post("/transcribe") |
|
async def transcribe_endpoint(file: UploadFile = File(...)): |
|
audio_data = await file.read() |
|
try: |
|
result = transcribe_whisper(audio_data) |
|
return { |
|
"result": [ |
|
{ |
|
"text": result |
|
} |
|
] |
|
} |
|
except HTTPException as e: |
|
return { |
|
"result": [ |
|
{ |
|
"text": "γ¨γ©γΌγηΊηγγΎγγ, γγδΈεΊ¦θ©¦γγ¦γγ γγ", |
|
} |
|
] |
|
} |
|
|
|
|
|
if __name__ == "__main__": |
|
logger.info(f"Model loaded on {device}") |
|
uvicorn.run(app, host="0.0.0.0", port=7860) |