from fastapi import FastAPI, File, UploadFile
from fastapi.responses import StreamingResponse
from fastapi.responses import FileResponse, HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
import os
import io
import json
import torch
#import httpcore
#setattr(httpcore, 'SyncHTTPTransport', 'AsyncHTTPProxy')


app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)


#============speech_to_text=======================================================
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read

import tempfile
import os

MODEL_NAME = "openai/whisper-base"
BATCH_SIZE = 8
FILE_LIMIT_MB = 1000
YT_LENGTH_LIMIT_S = 3600  # limit to 1 hour YouTube files

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)
@app.post("/speech_to_text_whispher")
async def speech_to_text_whispher(file: UploadFile = File(...)):
        file_path = "inputvoice.mp3"

        with open(file_path, "wb") as f:
            f.write(file.file.read())

        #with open(file_path, "rb") as f:
            #inputs = f.read()

        #inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
        #inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
        text = pipe(file_path, batch_size=BATCH_SIZE, generate_kwargs={"task":"transcribe"}, return_timestamps=True)["text"]
        return {"transcribe":text}


#text_to_speech bock===========================================================================


import googletrans
from googletrans import Translator
translator = Translator()
lan = googletrans.LANGUAGES
keys = list(lan.keys())
vals = list(lan.values())

def translate(text,language):
    return translator.translate(text,dest=keys[vals.index(language)]).text

os.environ["COQUI_TOS_AGREED"] = "1"
from TTS.api import TTS
# Get device
device = "cuda" if torch.cuda.is_available() else "cpu"

# List available 🐸TTS models
print(TTS().list_models())

# Init TTS
xtts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)

starting_text = " This is [SDR’s name] with Neural Leap. I saw you schedule a call with us for [Insert day and time] to learn more about our AI engineering services. Does that ring a bell?"
xtts.tts_to_file(starting_text,speaker_wav="hell fast 20x.mp3",language="en",file_path="output.wav")

@app.get("/text-to-speech/")
async def text_to_speech(text,language,mode):
    print(text)
    global xtts
    audio_file = 'text_to_speech.wav'
    if language=="vietnamese":
        from gtts import gTTS
        if mode=="both":
           tts = gTTS(str(translate(text,language))+" "+text)
           tts.save(audio_file)
        else:
           tts = gTTS(str(translate(text,language)))
           tts.save(audio_file)            
    else:
        if mode=="both":
            xtts.tts_to_file(str(translate(text,language))+" "+text,speaker_wav="hell fast 20x.mp3",language=keys[vals.index(language)],file_path=audio_file)
        else:
            xtts.tts_to_file(str(translate(text,language)),speaker_wav="hell fast 20x.mp3",language=keys[vals.index(language)],file_path=audio_file)
    return FileResponse('text_to_speech.wav', media_type='audio/mpeg')