Spaces:

reach-vb
/

asr-pyctcdecode

Runtime error

Vaibhav Srivastav commited on Jan 12, 2022

Commit

379fa33

1 Parent(s): 3b8d409

adding greedy decoding

Files changed (1) hide show

app.py CHANGED Viewed

@@ -22,8 +22,7 @@ def load_and_fix_data(input_file):
   if sample_rate !=16000:
     speech = librosa.resample(speech, sample_rate,16000)
   return speech
 def fix_transcription_casing(input_sentence):
   sentences = nltk.sent_tokenize(input_sentence)
   return (' '.join([s.replace(s[0],s[0].capitalize(),1) for s in sentences]))
@@ -41,10 +40,27 @@ def predict_and_decode(input_file):
   transcribed_text = fix_transcription_casing(pred.lower())
   return transcribed_text
 gr.Interface(predict_and_decode,
              inputs = gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Record/ Drop audio"),
-             outputs = gr.outputs.Textbox(label="Output Text"),
              title="ASR using Wav2Vec 2.0 & pyctcdecode",
              description = "Extending HF ASR models with pyctcdecode decoder",
              layout = "horizontal",

   if sample_rate !=16000:
     speech = librosa.resample(speech, sample_rate,16000)
   return speech
 def fix_transcription_casing(input_sentence):
   sentences = nltk.sent_tokenize(input_sentence)
   return (' '.join([s.replace(s[0],s[0].capitalize(),1) for s in sentences]))
   transcribed_text = fix_transcription_casing(pred.lower())
   return transcribed_text
+def predict_and_greedy_decode(input_file):
+  speech = load_and_fix_data(input_file)
+  input_values = processor(speech, return_tensors="pt", sampling_rate=16000).input_values
+  logits = model(input_values).logits
+  predicted_ids = torch.argmax(logits, dim=-1)
+  pred = processor.batch_decode(predicted_ids)
+  transcribed_text = fix_transcription_casing(pred.lower())
+  return transcribed_text
+def return_all_predictions(input_file):
+  return predict_and_decode(input_file), predict_and_greedy_decode(input_file)
 gr.Interface(predict_and_decode,
              inputs = gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Record/ Drop audio"),
+             outputs = [gr.outputs.Textbox(label="Beam CTC Decoding"), gr.outputs.Textbox(label="Greedy Decoding")],
              title="ASR using Wav2Vec 2.0 & pyctcdecode",
              description = "Extending HF ASR models with pyctcdecode decoder",
              layout = "horizontal",