from fastapi import FastAPI, File, UploadFile from fastapi.responses import StreamingResponse from fastapi.responses import FileResponse, HTMLResponse import os import io from gradio_client import Client client = Client("physician-ai/speech-to-text") print(client.view_api()) app = FastAPI() @app.post("/test/") async def test(): return "test" @app.post("/translate/") async def translate(text,language): respond = client.predict(text,language,api_name="/translate") return {"translated_text":respond} @app.post("/speech_to_text/") async def speech_to_text(file: UploadFile = File(...)): # Save the file with a specific name file_path = "inputvoice.mp3" with open(file_path, "wb") as f: f.write(file.file.read()) print("saved") respond = client.predict(file_path,api_name="/get_stt") #print(respond.result()) return respond os.environ["COQUI_TOS_AGREED"] = "1" from TTS.api import TTS model_names = TTS().list_models() m = model_names[0] print(model_names) tts = TTS(m, gpu=True) #tts.to("cpu") # no GPU or Amd tts.to("cuda") @app.get("/text-to-speech/") def text_to_speech(text,language): if language=="vietnamese": from gtts import gTTS tts = gTTS(text) audio_file = 'text_to_speech.mp3' tts.save(audio_file) else: tts.tts_to_file(text,speaker_wav="input.wav", file_path='text_to_speech.mp3') return FileResponse(audio_file, media_type='audio/mpeg')