from fastapi import FastAPI, File, UploadFile from fastapi.responses import StreamingResponse import os import io from faster_whisper import WhisperModel model_size = "large-v2" transcripe_model = WhisperModel(model_size, device="cuda", compute_type="float16") app = FastAPI() @app.post("/speech_to_text/") async def create_upload_file(file: UploadFile = File(...)): try: # Save the file with a specific name file_path = "inputvoice.mp3" with open(file_path, "wb") as f: f.write(file.file.read()) # Read the content of the saved file #with open(file_path, "rb") as f: #file_content = f.read() segments, info = transcripe_model.transcribe(file_path, beam_size=5) print("Detected language '%s' with probability %f" % (info.language, info.language_probability)) result = "" for segment in segments: result = result + segment.text # Return the content as a streaming response #return StreamingResponse(io.BytesIO(file_content), media_type="audio/mpeg", headers={"Content-Disposition": "inline; filename=inputvoice.mp3"}) return "result" except PermissionError as e: return {"error": f"PermissionError: {str(e)}"}