neuralleap's picture
Update main.py
f3865ff
from fastapi import FastAPI, File, UploadFile
from fastapi.responses import StreamingResponse
import os
import io
from faster_whisper import WhisperModel
model_size = "large-v2"
transcripe_model = WhisperModel(model_size, device="cuda", compute_type="float16")
app = FastAPI()
@app.post("/speech_to_text/")
async def create_upload_file(file: UploadFile = File(...)):
try:
# Save the file with a specific name
file_path = "inputvoice.mp3"
with open(file_path, "wb") as f:
f.write(file.file.read())
# Read the content of the saved file
#with open(file_path, "rb") as f:
#file_content = f.read()
segments, info = transcripe_model.transcribe(file_path, beam_size=5)
print("Detected language '%s' with probability %f" % (info.language, info.language_probability))
result = ""
for segment in segments:
result = result + segment.text
# Return the content as a streaming response
#return StreamingResponse(io.BytesIO(file_content), media_type="audio/mpeg", headers={"Content-Disposition": "inline; filename=inputvoice.mp3"})
return "result"
except PermissionError as e:
return {"error": f"PermissionError: {str(e)}"}