Spaces:

easxtn
/

alexa-demo

Sleeping

easxtn commited on May 3

Commit

25ab1a9

•

1 Parent(s): 2456988

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,18 +9,14 @@ def alexa(audio):
     return speech
 def speech_to_text(audio):
-    audio_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
-    if audio == None:
-        raise gr.Error("Submit an audio file")
-    else:
-        text = audio_to_text(audio)["text"]
     return text
 def text_generation(text):
     model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-128k-instruct",
-    torch_dtype="auto",
-    trust_remote_code=True, )
-    tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct")
     messages = [
         {"role": "user", "content": text}
@@ -33,8 +29,8 @@ def text_generation(text):
         "do_sample": True,
     }
-    text_gen= pipeline("text-generation", model="microsoft/Phi-3-mini-128k-instruct", tokenizer=tokenizer, trust_remote_code = True)
-    response = text_gen(messages, **generation_args)
     return response[0]["generated_text"]
 def text_to_speech(text):
@@ -45,4 +41,4 @@ def text_to_speech(text):
 gr.Interface(
   fn=alexa,
   inputs=gr.Audio(type="filepath"),
-  outputs=[gr.Audio(label="Audio", type="numpy",  autoplay=True)]).launch()

     return speech
 def speech_to_text(audio):
+    audio_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
+    text = audio_to_text(audio,generate_kwargs={"task": "transcribe", "language": "english"})["text"]
     return text
 def text_generation(text):
     model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-128k-instruct",
+    trust_remote_code=True)
+    tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct"), trust_remote_code=True)
     messages = [
         {"role": "user", "content": text}
         "do_sample": True,
     }
+    text_gen= pipeline("text-generation", model=model, tokenizer=tokenizer, trust_remote_code = True)
+    response = text_gen(messages, generation_args)
     return response[0]["generated_text"]
 def text_to_speech(text):
 gr.Interface(
   fn=alexa,
   inputs=gr.Audio(type="filepath"),
+  outputs=[gr.Audio(label="Audio", type="numpy",  autoplay=False)]).launch()