Spaces:

robinhad
/

ukrainian-ai

Running

App Files Files Community

Yurii Paniv commited on Jan 28, 2023

Commit

eeaef84

•

1 Parent(s): 95bbd8f

Speed up demo

Browse files

Files changed (4) hide show

.gitignore +1 -1
README.md +3 -1
app.py +38 -46
requirements.txt +7 -7

.gitignore CHANGED Viewed

@@ -131,5 +131,5 @@ dmypy.json
 .DS_Store
 #models
-config.json
 *.pth

 .DS_Store
 #models
+config.yaml
 *.pth

README.md CHANGED Viewed

@@ -4,6 +4,8 @@ emoji: 🇺🇦
 colorFrom: blue
 colorTo: yellow
 sdk: gradio
 app_file: app.py
 pinned: false
 ---
@@ -17,7 +19,7 @@ Link to speaking demo: [https://huggingface.co/spaces/robinhad/ukrainian-ai](htt
 Link to text demo: [https://huggingface.co/robinhad/gpt2-uk-conversational](https://huggingface.co/robinhad/gpt2-uk-conversational)
 # Technologies used:
-- [Wav2Vec2 XLS-R 300M fine-tuned to Ukrainian language](https://huggingface.co/Yehor/wav2vec2-xls-r-300m-uk-with-small-lm) for speech recognition.
 - [Ukrainian VITS TTS](https://github.com/robinhad/ukrainian-tts) for text-to-speech generation.
 - Conversational pipeline (this repository)

 colorFrom: blue
 colorTo: yellow
 sdk: gradio
+sdk_version : 3.16
+python_version: 3.10
 app_file: app.py
 pinned: false
 ---
 Link to text demo: [https://huggingface.co/robinhad/gpt2-uk-conversational](https://huggingface.co/robinhad/gpt2-uk-conversational)
 # Technologies used:
+- [Wav2Vec2 XLS-R 300M fine-tuned to Ukrainian language](https://huggingface.co/robinhad/wav2vec2-xls-r-300m-uk) for speech recognition.
 - [Ukrainian VITS TTS](https://github.com/robinhad/ukrainian-tts) for text-to-speech generation.
 - Conversational pipeline (this repository)

app.py CHANGED Viewed

@@ -1,83 +1,75 @@
 import gradio as gr
-from transformers import Conversation, ConversationalPipeline, pipeline
 import tempfile
-import torch
-from os.path import exists
-import requests
-from TTS.utils.synthesizer import Synthesizer
 import gradio as gr
-def download(url, file_name):
-    if not exists(file_name):
-        print(f"Downloading {file_name}")
-        r = requests.get(url, allow_redirects=True)
-        with open(file_name, "wb") as file:
-            file.write(r.content)
-    else:
-        print(f"Found {file_name}. Skipping download...")
-print("downloading uk/mykyta/vits-tts")
-release_number = "v2.0.0-beta"
-model_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/model-inference.pth"
-config_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/config.json"
-model_path = "model.pth"
-config_path = "config.json"
-download(model_link, model_path)
-download(config_link, config_path)
 p = pipeline(
-    "automatic-speech-recognition", "Yehor/wav2vec2-xls-r-300m-uk-with-small-lm"
 )
 conv: ConversationalPipeline = pipeline(
-    "conversational", "robinhad/gpt2-uk-conversational"
 )
-synthesizer = Synthesizer(
-    model_path,
-    config_path,
-    None,
-    None,
-    None,
-)
-badge = (
-    "https://visitor-badge-reloaded.herokuapp.com/badge?page_id=robinhad.ukrainian-ai"
-)
-def transcribe(audio, history):
     text = p(audio)["text"]
     history = history or []
     past_user_inputs = [i[0] for i in history]
     generated_responses = [i[1] for i in history]
-    response = conv(Conversation(text, past_user_inputs, generated_responses))
     response = response.generated_responses[-1]
     history.append((text, response))
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
-        with torch.no_grad():
-            wavs = synthesizer.tts(response)
-            synthesizer.save_wav(wavs, fp)
         return text, fp.name, history, history
 iface = gr.Interface(
     fn=transcribe,
-    inputs=[gr.inputs.Audio(source="microphone", type="filepath"), "state"],
     outputs=[
         gr.outputs.Textbox(label="Recognized text"),
-        gr.outputs.Audio(label="Output"),
         gr.outputs.Chatbot(label="Chat"),
         "state",
     ],
     description="""Це альфа-версія end-to-end розмовного бота, з яким можна поспілкуватися голосом.
     Перейдіть сюди для доступу до текстової версії: [https://huggingface.co/robinhad/gpt2-uk-conversational](https://huggingface.co/robinhad/gpt2-uk-conversational)
     """,
-    article=f"""Розпізнавання української: [https://huggingface.co/Yehor/wav2vec2-xls-r-300m-uk-with-small-lm](https://huggingface.co/Yehor/wav2vec2-xls-r-300m-uk-with-small-lm)
-    Синтез української: [https://huggingface.co/spaces/robinhad/ukrainian-tts](https://huggingface.co/spaces/robinhad/ukrainian-tts)
-    <center><img src="{badge}" alt="visitors badge"/></center>""",
 )
 iface.launch()

 import gradio as gr
+from transformers import Conversation, ConversationalPipeline, pipeline, AlbertTokenizerFast
 import tempfile
 import gradio as gr
+from ukrainian_tts.tts import TTS, Voices, Stress
+from enum import Enum
+tts = TTS(device="cpu") # can try gpu, mps
 p = pipeline(
+    "automatic-speech-recognition", "robinhad/wav2vec2-xls-r-300m-uk"
 )
+tokenizer = AlbertTokenizerFast.from_pretrained("robinhad/gpt2-uk-conversational")
 conv: ConversationalPipeline = pipeline(
+    "conversational", "robinhad/gpt2-uk-conversational", tokenizer=tokenizer
 )
+class VoiceOption(Enum):
+    Olena = "Олена (жіночий) 👩"
+    Mykyta = "Микита (чоловічий) 👨"
+    Lada = "Лада (жіночий) 👩"
+    Dmytro = "Дмитро (чоловічий) 👨"
+    Olga = "Ольга (жіночий) 👩"
+voice_mapping = {
+    VoiceOption.Olena.value: Voices.Olena.value,
+    VoiceOption.Mykyta.value: Voices.Mykyta.value,
+    VoiceOption.Lada.value: Voices.Lada.value,
+    VoiceOption.Dmytro.value: Voices.Dmytro.value,
+    VoiceOption.Olga.value: Voices.Olga.value,
+}
+def transcribe(audio, selected_voice, history):
     text = p(audio)["text"]
     history = history or []
+    selected_voice = voice_mapping[selected_voice]
     past_user_inputs = [i[0] for i in history]
     generated_responses = [i[1] for i in history]
+    next_output_length = len(tokenizer.encode("".join(generated_responses + past_user_inputs))) + 60
+    response = conv(Conversation(text, past_user_inputs, generated_responses), max_length=next_output_length, penalty_alpha=0.6, top_k=4)
     response = response.generated_responses[-1]
     history.append((text, response))
     with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
+        _, output_text = tts.tts(response, selected_voice, Stress.Dictionary.value, fp)
         return text, fp.name, history, history
 iface = gr.Interface(
     fn=transcribe,
+    inputs=[
+        gr.inputs.Audio(source="microphone", type="filepath"),
+        gr.components.Radio(
+            label="Голос",
+            choices=[option.value for option in VoiceOption],
+            value=VoiceOption.Olena.value,
+        ),
+        "state"],
     outputs=[
         gr.outputs.Textbox(label="Recognized text"),
+        gr.outputs.Audio(label="Output", type="filepath"),
         gr.outputs.Chatbot(label="Chat"),
         "state",
     ],
     description="""Це альфа-версія end-to-end розмовного бота, з яким можна поспілкуватися голосом.
     Перейдіть сюди для доступу до текстової версії: [https://huggingface.co/robinhad/gpt2-uk-conversational](https://huggingface.co/robinhad/gpt2-uk-conversational)
     """,
+    article=f"""Розпізнавання української: [https://huggingface.co/robinhad/wav2vec2-xls-r-300m-uk](https://huggingface.co/robinhad/wav2vec2-xls-r-300m-uk)
+    Синтез української: [https://huggingface.co/spaces/robinhad/ukrainian-tts](https://huggingface.co/spaces/robinhad/ukrainian-tts)""",
 )
 iface.launch()

requirements.txt CHANGED Viewed

@@ -1,7 +1,7 @@
-gradio
-transformers==4.19.4
-TTS==0.6.2
-torch
-pyctcdecode
-https://github.com/kpu/kenlm/archive/master.zip
-sentencepiece==0.1.96

+gradio==3.16
+transformers==4.26
+git+https://github.com/robinhad/ukrainian-tts.git@d3459a5e8a78dd95bfd1b43a4a659637a12a61d7
+# this would be needed if model would have an LM
+#pyctcdecode
+#https://github.com/kpu/kenlm/archive/master.zip
+#sentencepiece==0.1.96