easxtn commited on
Commit
7157202
1 Parent(s): 0afe03e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -14
app.py CHANGED
@@ -1,20 +1,52 @@
1
  import gradio as gr
2
- from transformers import pipeline
3
-
4
-
5
- speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
6
- #text_generation = pipeline("text-generation", model="microsoft/Phi-3-mini-128k-instruct", trust_remote_code = True)
7
- #text_to_speech = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs")
8
 
9
  def alexa(audio):
10
- text = speech_to_text(audio)["text"]
11
- #generate = text_generation(text, max_length=60)[0]["generated_text"]
12
- #speech = text_to_speech(text)
13
- #return (speech["sampling_rate"], speech["audio"][0])
 
 
 
 
 
 
 
14
  return text
15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  gr.Interface(
17
- fn = alexa,
18
- inputs = gr.Audio(type="filepath"),
19
- #outputs = gr.Audio(label="Narration", type="numpy", autoplay=True), live=True).launch()
20
- outputs = gr.Textbox(), live=True).launch()
 
1
  import gradio as gr
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
 
 
 
 
4
 
5
  def alexa(audio):
6
+ converted_text = speech_to_text(audio)
7
+ generated_text = text_generation(text)
8
+ speech = text_to_speech(generated_text)
9
+ return speech
10
+
11
+ def speech_to_text(audio):
12
+ audio_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
13
+ if audio == None:
14
+ raise gr.Error("Please, submit an audio file")
15
+ else:
16
+ text = audio_to_text(audio)["text"]
17
  return text
18
 
19
+ def text_generation(text):
20
+ torch.random.manual_seed(0)
21
+
22
+ model = AutoModelForCausalLM.from_pretrained("microsoft/Phi-3-mini-128k-instruct",
23
+ device_map="cuda",
24
+ torch_dtype="auto",
25
+ trust_remote_code=True, )
26
+ tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct")
27
+
28
+ messages = [
29
+ {"role": "user", "content": text
30
+ ]
31
+
32
+ generation_args = {
33
+ "max_new_tokens": 500,
34
+ "return_full_text": False,
35
+ "temperature": 0.0,
36
+ "do_sample": False,
37
+ }
38
+
39
+ text_gen= pipeline("text-generation", model="microsoft/Phi-3-mini-128k-instruct", trust_remote_code = True)
40
+ response = text_gen(messages, **generation_args)
41
+ return response[0]["generated_text"]
42
+
43
+ def text_to_speech(text):
44
+ text_to_audio = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs")
45
+ narrated_text = text_to_audio(text)
46
+ return (narrated_text["sampling_rate"], narrated_text["audio"][0] )
47
+
48
+
49
  gr.Interface(
50
+ fn=alexa,
51
+ inputs=gr.Audio(type="filepath"),
52
+ outputs=[gr.Audio(label="Narration", type="numpy", autoplay=True)], live=True).launch()