Spaces:
Runtime error
Runtime error
File size: 5,060 Bytes
3824a4a 9617099 bb7c6aa e982bb1 65fbe83 e982bb1 5e62e0a 65fbe83 9617099 3824a4a 74b879d b3056e9 0b22c4c 4e6667e 0b22c4c bb900d4 934fb75 bb900d4 74b879d 4d66286 1cd7901 4d66286 74b879d 1cd7901 4d66286 1cd7901 4d66286 d6e241d 4d66286 0b22c4c 4d66286 0b22c4c 3b2f7a4 4d66286 1cd7901 4d66286 bdaa837 4d66286 3b2f7a4 f08f95f 4d66286 d0825e8 2db42f8 08a9183 0b22c4c 08a9183 2db42f8 08a9183 64778ee 604b0e3 cb258da 64778ee 55ba9e5 d3b5b71 55ba9e5 eb8c58a d3b5b71 04beede d3b5b71 e2fb64b ed7d6fc d3b5b71 eab9bbc eb8c58a 55ba9e5 cb258da 8583fc2 cb258da 8583fc2 44d5308 cb258da 64778ee d3b5b71 64778ee eb8c58a cb258da 6c15fe9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import gradio as gr
import os
import wave
import json
import requests
import urllib.request
import speech_recognition
from vosk_tts import Model, Synth
from scipy.io.wavfile import write
from pydub import AudioSegment
from pydub.playback import play
PATH_TO_MODEL = os.path.join(os.getcwd(), "vosk-model-tts-ru-0.4-multi")
PATH_TO_OUTPUT = os.path.join(os.getcwd(), "content")
LIPSYNC_KEY = "sk-YOVNQzHmpga9My3dwlSo9BQN907TuPZQXcHn50ztigTwm3I2"
YA_GPT_KEY = "NONE"
IAM_TOKEN = "NONE"
X_FOLDER_ID = "NONE"
instruction = """
Ответь на запрос так, как ответил бы на него Павел Воля. Используй данные из биографии Павла Воли, если это потребуется. Отвечай на запрос в его стиле. Ответ должен содержать не болеее 10 предложений. Все цифры пиши словами.
"""
class VoiceGenerator:
def __init__(self):
self.model = Model(PATH_TO_MODEL)
def generate(self, text, file_name='output.wav'):
synth = Synth(self.model)
path = os.path.join(PATH_TO_OUTPUT, file_name)
synth.synth(text, path)
return path
def recognize_audio(file_path):
with speech_recognition.AudioFile(file_path) as source:
audio = recognizer.record(source)
recognized_data = "Ошибка распознавания речи"
try:
recognized_data = recognizer.recognize_google(audio, language="ru").lower()
except speech_recognition.UnknownValueError:
recognized_data = recognized_data + ' UnknownValueError'
except speech_recognition.RequestError:
recognized_data = recognized_data + ' RequestError'
print(recognized_data)
return recognized_data
def ask_gpt(request):
result = requests.post(
url='https://llm.api.cloud.yandex.net/llm/v1alpha/instruct',
headers={'Authorization': f'Bearer {IAM_TOKEN}', 'x-folder-id': X_FOLDER_ID},
json={
"model": "general",
"instruction_text": instruction,
"request_text": request,
"generation_options": {
"max_tokens": 1500,
"temperature": 0.5
}
}
)
data = json.loads(result.text)
print(data)
if 'result' in data:
return data['result']['alternatives'][0]['text']
else:
return 'Я пока не могу ответить на ваш вопрос. Все мои мозги на сервере, а он не отвечает.'
recognizer = speech_recognition.Recognizer()
vg = VoiceGenerator()
def get_video(pathToWav, pathToImage, pathToResult):
files = [
("input_face", open(pathToImage, "rb")),
("input_audio", open(pathToWav, "rb")),
]
payload = {}
response = requests.post(
"https://api.gooey.ai/v2/Lipsync/form/",
headers={
"Authorization": "Bearer " + LIPSYNC_KEY,
},
files = files,
data={"json": json.dumps(payload)}
)
assert response.ok, response.content
result = response.json()
#print(response.status_code, result["output"]["output_video"])
urllib.request.urlretrieve(result["output"]["output_video"], pathToResult)
def resultSay(text):
generated_audio = vg.generate(text)
referenceName = 'reference.jpg'
resultName = 'video.mp4'
pathToReference = os.path.join(PATH_TO_OUTPUT, referenceName)
pathToResult = os.path.join(PATH_TO_OUTPUT, resultName)
get_video(generated_audio, pathToReference, pathToResult)
return pathToResult
def resultText(text):
return resultSay(ask_gpt(text))
def resultAudio(audio):
sample_rate, data = audio
file_name = 'voice_input.wav'
file_path = os.path.join(PATH_TO_OUTPUT, file_name)
write(file_path, sample_rate, data)
return resultText(recognize_audio(file_path))
def resultOnlyAudio(text):
return vg.generate(text)
def resultCustom(text, photo):
print(text, photo)
generated_audio = vg.generate(text)
referenceName = 'reference.jpg'
if (photo != None):
referenceName = 'custom.jpg'
photo.save(os.path.join(PATH_TO_OUTPUT, referenceName))
resultName = 'video.mp4'
pathToReference = os.path.join(PATH_TO_OUTPUT, referenceName)
pathToResult = os.path.join(PATH_TO_OUTPUT, resultName)
get_video(generated_audio, pathToReference, pathToResult)
return pathToResult
demoMic = gr.Interface(
resultAudio,
gr.Audio(sources=["microphone"]),
"playable_video",
)
demoText = gr.Interface(
resultText,
gr.Textbox(label="Query"),
"playable_video",
)
demoSay = gr.Interface(
resultCustom,
[gr.Textbox(label="Query"), gr.Image(type="pil")],
"playable_video",
)
demoOnly = gr.Interface(
resultOnlyAudio,
gr.Textbox(label="Query"),
"audio",
)
demo = gr.TabbedInterface([demoMic, demoText, demoSay, demoOnly], ["Ask by micro", "Ask by text", "Generate video", "Say this text"])
demo.launch() |