nata0801 commited on
Commit
e80ec97
1 Parent(s): d493028

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -1
app.py CHANGED
@@ -6,4 +6,52 @@ import warnings
6
 
7
  from transformers import Wav2Vec2Tokenizer, Wav2Vec2ForCTC
8
 
9
- warnings.filterwarnings("ignore")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  from transformers import Wav2Vec2Tokenizer, Wav2Vec2ForCTC
8
 
9
+ warnings.filterwarnings("ignore")
10
+
11
+ #load wav2vec2 tokenizer and model
12
+ # define speech-to-text function
13
+ def asr_transcript(audio_file, language):
14
+
15
+ if language == "English":
16
+ model_name = "facebook/wav2vec2-large-960h-lv60-self"
17
+ elif language == "Russian":
18
+ model_name = "jonatasgrosman/wav2vec2-large-xlsr-53-russian"
19
+ elif language == "French":
20
+ model_name = "jonatasgrosman/wav2vec2-large-xlsr-53-french"
21
+
22
+ tokenizer = Wav2Vec2Tokenizer.from_pretrained(model_name)
23
+ model = Wav2Vec2ForCTC.from_pretrained(model_name)
24
+
25
+ transcript = ""
26
+
27
+ # Stream over 20 seconds chunks
28
+ stream = librosa.stream(
29
+ audio_file.name, block_length=20, frame_length=16000, hop_length=16000
30
+ )
31
+
32
+ for speech in stream:
33
+ if len(speech.shape) > 1:
34
+ speech = speech[:, 0] + speech[:, 1]
35
+
36
+ input_values = tokenizer(speech, return_tensors="pt").input_values
37
+ logits = model(input_values).logits
38
+
39
+ predicted_ids = torch.argmax(logits, dim=-1)
40
+ transcription = tokenizer.batch_decode(predicted_ids)[0]
41
+ transcript += transcription.lower() + " "
42
+
43
+ return transcript
44
+
45
+ gradio_ui = gr.Interface(
46
+ fn=asr_transcript,
47
+ title="Speech-to-Text with HuggingFace+Wav2Vec2",
48
+ description="Upload an audio clip in Russian, English, or French and let AI do the hard work of transcribing",
49
+ inputs = [gr.inputs.Audio(label="Upload Audio File", type="file"),
50
+ gr.inputs.Radio(label="Pick an STT Model - (language)",
51
+ choices=["English",
52
+ "Russian",
53
+ "French"])],
54
+ outputs=gr.outputs.Textbox(label="Auto-Transcript"),
55
+ )
56
+
57
+ gradio_ui.launch()