from fastapi import FastAPI, File, UploadFile
from fastapi.responses import StreamingResponse
from fastapi.responses import FileResponse, HTMLResponse
import os
import io

from gradio_client import Client

client = Client("physician-ai/speech-to-text")

print(client.view_api())
app = FastAPI()

@app.post("/test/")
async def test():
    return "test"

@app.post("/translate/")
async def translate(text,language):
    respond = client.predict(text,language,api_name="/translate")
    return {"translated_text":respond}

@app.post("/speech_to_text/")
async def speech_to_text(file: UploadFile = File(...)):

        # Save the file with a specific name
        file_path = "inputvoice.mp3"

        with open(file_path, "wb") as f:
            f.write(file.file.read())
        print("saved")
        respond = client.predict(file_path,api_name="/get_stt") 
        #print(respond.result())
        return respond


os.environ["COQUI_TOS_AGREED"] = "1"
from TTS.api import TTS

model_names = TTS().list_models()
m = model_names[0]
print(model_names)
tts = TTS(m, gpu=True)
#tts.to("cpu") # no GPU or Amd
tts.to("cuda")

@app.get("/text-to-speech/")
def text_to_speech(text,language):
    if language=="vietnamese":
        from gtts import gTTS
        tts = gTTS(text)
        audio_file = 'text_to_speech.mp3'
        tts.save(audio_file)
    else:
        tts.tts_to_file(text,speaker_wav="input.wav", file_path='text_to_speech.mp3')
    return FileResponse(audio_file, media_type='audio/mpeg')