from vosk import Model, KaldiRecognizer  # оффлайн-распознавание от Vosk
from vosk_tts import Model, Synth
import speech_recognition  # распознавание пользовательской речи (Speech-To-Text)
import wave  # создание и чтение аудиофайлов формата wav
import json  # работа с json-файлами и json-строками
import os  # работа с файловой системой
import requests
import IPython
from pydub import AudioSegment
from pydub.playback import play
import urllib.request

PATH_TO_MODEL = "vosk-model-tts-ru-0.4-multi"
PATH_TO_OUTPUT = "C:/Users/user/Desktop/deepfake_sirius/materials/audio" #TODO: IT


k = "sk-YOVNQzHmpga9My3dwlSo9BQN907TuPZQXcHn50ztigTwm3I2"
files = [
    ("input_face", open("C:\\Users\\user\\Desktop\\deepfake_sirius\\materials\\scale_1200.jpg", "rb")), #TODO: IT
    ("input_audio", open("C:\\Users\\user\\Desktop\\deepfake_sirius\\materials\\audio\\output.wav", "rb")), #TODO: IT
]
payload = {}


class VoiceGenerator:
    def __init__(self):
        self.model = Model(model_path=PATH_TO_MODEL)
    def generate(self, text, file_name='output.wav'):
        synth = Synth(self.model)
        path = os.path.join(PATH_TO_OUTPUT, file_name)
        synth.synth(text, path)
        return path

def record_and_recognize_audio(*args: tuple):
    # with open("microphone-results.wav", "wb") as file:
    # file.write(audio.get_wav_data())
    try:
        recognized_data = recognizer.recognize_google(audio, language="ru").lower()
    except speech_recognition.UnknownValueError:
        pass
    except speech_recognition.RequestError:
        recognized_data = use_offline_recognition()
    return recognized_data


def use_offline_recognition():
    """
    Переключение на оффлайн-распознавание речи
    :return: распознанная фраза
    """
    recognized_data = ""
    try:
        # проверка наличия модели на нужном языке в каталоге приложения
        if not os.path.exists("models/vosk-model-small-ru-0.4"):
            print("Please download the model from:\n"
                  "https://alphacephei.com/vosk/models and unpack as 'model' in the current folder.")
            exit(1)

        # анализ записанного в микрофон аудио (чтобы избежать повторов фразы)
        wave_audio_file = wave.open("microphone-results.wav", "rb")
        model = Model("models/vosk-model-small-ru-0.4")
        offline_recognizer = KaldiRecognizer(model, wave_audio_file.getframerate())

        data = wave_audio_file.readframes(wave_audio_file.getnframes())
        if len(data) > 0:
            if offline_recognizer.AcceptWaveform(data):
                recognized_data = offline_recognizer.Result()

                # получение данных распознанного текста из JSON-строки
                # (чтобы можно было выдать по ней ответ)
                recognized_data = json.loads(recognized_data)
                recognized_data = recognized_data["text"]
    except:
        print("Sorry, speech service is unavailable. Try again later")

    return recognized_data


def ask(request):
    instruction = """
    Ответь на запрос так, как ответил бы на него Павел Воля. Используй данные из биографии Павла Воли, если это потребуется. Отвечай на запрос в его стиле. Ответ должен содержать не болеее 10 предложений.
    """
    result = requests.post(
        url='https://llm.api.cloud.yandex.net/llm/v1alpha/instruct',
        headers={
            "Authorization": "Api-Key AQVNyVqBi-XoJ1cAo7VIxq6ztgXm3owqowtso5Qb",
        },
        json={
            "model": "general",
            "instruction_text": instruction,
            "request_text": request,
            "generation_options": {
                "max_tokens": 1500,
                "temperature": 0.5
            }
        }
    )
    data = json.loads(result.text)
    return(data['result']['alternatives'][0]['text'])


if __name__ == "__main__":
    # инициализация инструментов распознавания и ввода речи
    recognizer = speech_recognition.Recognizer()
    microphone = speech_recognition.Microphone()
    vg = VoiceGenerator()
    while True:
        # старт записи речи с последующим выводом распознанной речи
        # и удалением записанного в микрофон аудио
        voice_input = record_and_recognize_audio()
        os.remove("microphone-results.wav")
        print(voice_input)
        path_to_file = vg.generate(ask(voice_input))
        print(path_to_file)
        response = requests.post(
            "https://api.gooey.ai/v2/Lipsync/form/",
            headers={
                "Authorization": "Bearer " + k,
            },
            files=files,
            data={"json": json.dumps(payload)},
        )
        assert response.ok, response.content
        #song = AudioSegment.from_wav(path_to_file)
        result = response.json()
        print(response.status_code, result["output"]["output_video"])
        #play(song)
        urllib.request.urlretrieve(result["output"]["output_video"], "C:\\Users\\user\\Desktop\\deepfake_sirius\\materials\\video.mp4")
        os.startfile("C:\\Users\\user\\Desktop\\deepfake_sirius\\materials\\video.mp4")
        break;