Samuel L Meyers commited on
Commit
44742a9
1 Parent(s): dd6b086

Test huggingface

Browse files
Files changed (1) hide show
  1. app.py +18 -12
app.py CHANGED
@@ -1,20 +1,26 @@
1
  import gradio as gr
2
  import torch
3
  import scipy.io.wavfile as wavfile
4
- from transformers import AutoProcessor, SeamlessM4TModel
5
 
6
- tokenizer = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
7
- model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
8
 
9
- text = "some example text in the English language"
10
 
11
- def greet(text):
12
- inputs = tokenizer(text, return_tensors="pt")
13
- with torch.no_grad():
14
- output = model(**inputs, decoder_input_ids=inputs["input_ids"]).waveform
15
- out = output[0]
16
- wavfile.write("tmp.wav", rate=16000, data=out)
17
- return open("tmp.wav", "rb").read()
18
 
19
- iface = gr.Interface(fn=greet, inputs="text", outputs="audio")
 
 
 
 
 
 
20
  iface.launch()
 
1
  import gradio as gr
2
  import torch
3
  import scipy.io.wavfile as wavfile
4
+ from transformers import AutoProcessor, SeamlessM4TModel, pipeline
5
 
6
+ # tokenizer = AutoProcessor.from_pretrained("facebook/hf-seamless-m4t-medium")
7
+ # model = SeamlessM4TModel.from_pretrained("facebook/hf-seamless-m4t-medium")
8
 
9
+ # text = "some example text in the English language"
10
 
11
+ # def greet(text):
12
+ # inputs = tokenizer(text, return_tensors="pt")
13
+ # with torch.no_grad():
14
+ # output = model(**inputs, decoder_input_ids=inputs["input_ids"]).waveform
15
+ # out = output[0]
16
+ # wavfile.write("tmp.wav", rate=16000, data=out)
17
+ # return open("tmp.wav", "rb").read()
18
 
19
+ def stt(audio):
20
+ print(audio)
21
+ br, data = audio
22
+ tscrb = pipeline("automatic-speech-recognition", model="facebook/hubert-large-ls960-ft")
23
+ return tscrb(data)
24
+
25
+ iface = gr.Interface(fn=stt, inputs="audio", outputs="text")
26
  iface.launch()