hindi-speech-recognition-veda_intern-wav2vec2

Runtime error

Harveenchadha commited on Feb 2, 2023

Commit

4f9fa63

•

1 Parent(s): 0b02e4c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,10 +21,26 @@ def read_file(wav):
     return resampled_signal
-def parse_transcription(wav_file):
     filename = wav_file.split('.')[0]
     convert(wav_file, filename + "16k.wav")
     speech, _ = sf.read(filename + "16k.wav")
     #speech = read_file(wav_file)
@@ -36,9 +52,10 @@ def parse_transcription(wav_file):
     transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
     return transcription
-processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
-model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
@@ -49,6 +66,8 @@ txtbox = gr.Textbox(
             lines=5
         )
-gr.Interface(parse_transcription, inputs = input_,  outputs=txtbox,
              streaming=True, interactive=True,
              analytics_enabled=False, show_tips=False, enable_queue=True).launch(inline=False);

     return resampled_signal
+def parse_transcription_with_lm(wav_file):
+    speech = convert_file(wav_file)
+    inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
+    with torch.no_grad():
+        logits = model(**inputs).logits
+    int_result = processor.batch_decode(logits.cpu().numpy())
+    transcription =  int_result.text
+    return transcription
+def convert_file(wav_file):
     filename = wav_file.split('.')[0]
     convert(wav_file, filename + "16k.wav")
     speech, _ = sf.read(filename + "16k.wav")
+    return speech
+def parse_transcription(wav_file):
+    speech = convert_file(wav_file)
     #speech = read_file(wav_file)
     transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
     return transcription
+model_id = "Harveenchadha/vakyansh-wav2vec2-hindi-him-4200"
+processor = Wav2Vec2Processor.from_pretrained(model_id)
+processor_with_LM = Wav2Vec2ProcessorWithLM.from_pretrained(model_id)
+model = Wav2Vec2ForCTC.from_pretrained(model_id)
             lines=5
         )
+chkbox = gr.Checkbox(label="Apply LM", value=False)
+gr.Interface(parse_transcription, inputs = [input_, chckbox],  outputs=txtbox,
              streaming=True, interactive=True,
              analytics_enabled=False, show_tips=False, enable_queue=True).launch(inline=False);