Spaces:

easxtn
/

alexa-demo

Sleeping

App Files Files Community

easxtn commited on May 3

Commit

7157202

•

1 Parent(s): 0afe03e

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -14

app.py CHANGED Viewed

@@ -1,20 +1,52 @@
 import gradio as gr
-from transformers import pipeline
-speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
-#text_generation = pipeline("text-generation", model="microsoft/Phi-3-mini-128k-instruct", trust_remote_code = True)
-#text_to_speech = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs")
 def alexa(audio):
-    text = speech_to_text(audio)["text"]
-    #generate = text_generation(text, max_length=60)[0]["generated_text"]
-    #speech = text_to_speech(text)
-    #return (speech["sampling_rate"], speech["audio"][0])
     return text
 gr.Interface(
-    fn = alexa,
-    inputs = gr.Audio(type="filepath"),
-    #outputs = gr.Audio(label="Narration", type="numpy",  autoplay=True), live=True).launch()
-    outputs = gr.Textbox(), live=True).launch()

 import gradio as gr
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 def alexa(audio):
+    converted_text = speech_to_text(audio)
+    generated_text = text_generation(text)
+    speech = text_to_speech(generated_text)
+    return speech
+def speech_to_text(audio):
+    audio_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
+    if audio == None:
+        raise gr.Error("Please, submit an audio file")
+    else:
+        text = audio_to_text(audio)["text"]
     return text
+def text_generation(text):
+    torch.random.manual_seed(0)
+    model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-128k-instruct",
+    device_map="cuda",
+    torch_dtype="auto",
+    trust_remote_code=True, )
+    tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct")
+    messages = [
+        {"role": "user", "content": text
+    ]
+    generation_args = {
+        "max_new_tokens": 500,
+        "return_full_text": False,
+        "temperature": 0.0,
+        "do_sample": False,
+    }
+    text_gen= pipeline("text-generation", model="microsoft/Phi-3-mini-128k-instruct", trust_remote_code = True)
+    response = text_gen(messages, **generation_args)
+    return response[0]["generated_text"]
+    def text_to_speech(text):
+    text_to_audio = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs")
+    narrated_text = text_to_audio(text)
+    return (narrated_text["sampling_rate"], narrated_text["audio"][0] )
 gr.Interface(
+  fn=alexa,
+  inputs=gr.Audio(type="filepath"),
+  outputs=[gr.Audio(label="Narration", type="numpy",  autoplay=True)], live=True).launch()