Spaces:

rohitp1
/

whisper-small-en-noise-robust

Runtime error

App Files Files Community

rohitp1 commited on Apr 28, 2023

Commit

f2af138

1 Parent(s): e651795

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -1

app.py CHANGED Viewed

@@ -8,6 +8,7 @@ import os
 import transformers
 from transformers import pipeline, WhisperForConditionalGeneration, WhisperTokenizer, WhisperFeatureExtractor
 import time
 # def greet_from_secret(ignored_param):
 #     name = os.environ.get('TOKEN')
@@ -34,9 +35,16 @@ tokenizer3 = WhisperTokenizer.from_pretrained(M3, use_auth_token=auth_token)
 feat_ext3 = WhisperFeatureExtractor.from_pretrained(M3, use_auth_token=auth_token)
 p1 = pipeline('automatic-speech-recognition', model=model1, tokenizer=tokenizer1, feature_extractor=feat_ext1)
 p2 = pipeline('automatic-speech-recognition', model=model2, tokenizer=tokenizer2, feature_extractor=feat_ext2)
 p3 = pipeline('automatic-speech-recognition', model=model3, tokenizer=tokenizer3, feature_extractor=feat_ext3)
 def transcribe(mic_input, upl_input, model_type):
     if mic_input:
@@ -49,6 +57,8 @@ def transcribe(mic_input, upl_input, model_type):
         text = p2(audio)["text"]
     elif model_type == 'CleanFinetuned':
         text = p3(audio)["text"]
     else:
         text = p1(audio)["text"]
     end_time = time.time()
@@ -105,7 +115,7 @@ if __name__ == "__main__":
                 )
                 with gr.Row():
-                    model_type = gr.inputs.Dropdown(["RobustDistillation", "NoisyFinetuned", "CleanFinetuned"], label='Model Type')
                 with gr.Row():
                     clr_btn = gr.Button(value="Clear", variant="secondary")

 import transformers
 from transformers import pipeline, WhisperForConditionalGeneration, WhisperTokenizer, WhisperFeatureExtractor
 import time
+import torch
 # def greet_from_secret(ignored_param):
 #     name = os.environ.get('TOKEN')
 feat_ext3 = WhisperFeatureExtractor.from_pretrained(M3, use_auth_token=auth_token)
+# make quantized model
+quantized_model1 = torch.quantization.quantize_dynamic(
+    model3, {torch.nn.Linear}, dtype=torch.qint8
+)
 p1 = pipeline('automatic-speech-recognition', model=model1, tokenizer=tokenizer1, feature_extractor=feat_ext1)
 p2 = pipeline('automatic-speech-recognition', model=model2, tokenizer=tokenizer2, feature_extractor=feat_ext2)
 p3 = pipeline('automatic-speech-recognition', model=model3, tokenizer=tokenizer3, feature_extractor=feat_ext3)
+p1_quant = pipeline('automatic-speech-recognition', model=model1, tokenizer=tokenizer1, feature_extractor=feat_ext1)
 def transcribe(mic_input, upl_input, model_type):
     if mic_input:
         text = p2(audio)["text"]
     elif model_type == 'CleanFinetuned':
         text = p3(audio)["text"]
+    elif model_type == 'NoisyDistillationQuantised':
+        text = p1_quant(audio)['text']
     else:
         text = p1(audio)["text"]
     end_time = time.time()
                 )
                 with gr.Row():
+                    model_type = gr.inputs.Dropdown(["RobustDistillation", "NoisyFinetuned", "CleanFinetuned", "NoisyDistillationQuantised"], label='Model Type')
                 with gr.Row():
                     clr_btn = gr.Button(value="Clear", variant="secondary")