OpenAI_Whisper_ASR

Runtime error

App Files Files Community

mmichelli

mrm8488 commited on Dec 9, 2022

Commit

34f21a1

0 Parent(s):

Duplicate from mrm8488/OpenAI_Whisper_ASR

Browse files

Co-authored-by: Manuel Romero <mrm8488@users.noreply.huggingface.co>

Files changed (4) hide show

.gitattributes +31 -0
README.md +14 -0
app.py +144 -0
requirements.txt +2 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,31 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,14 @@

+---
+title: OpenAI Whisper ASR
+emoji: 🗣️🔤
+colorFrom: purple
+colorTo: red
+sdk: gradio
+sdk_version: 3.3.1
+app_file: app.py
+pinned: false
+license: bigscience-bloom-rail-1.0
+duplicated_from: mrm8488/OpenAI_Whisper_ASR
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import gradio as gr
+import whisper
+model = whisper.load_model("base")
+def inference(audio):
+    audio = whisper.load_audio(audio)
+    audio = whisper.pad_or_trim(audio)
+    mel = whisper.log_mel_spectrogram(audio).to(model.device)
+    _, probs = model.detect_language(mel)
+    lang = max(probs, key=probs.get)
+    options = whisper.DecodingOptions(fp16 = False)
+    result = whisper.decode(model, mel, options)
+    return lang.upper(), result.text
+title="Open AI Whisper"
+description="Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification."
+css = """
+        .gradio-container {
+            font-family: 'IBM Plex Sans', sans-serif;
+        }
+        .gr-button {
+            color: white;
+            border-color: black;
+            background: black;
+        }
+        input[type='range'] {
+            accent-color: black;
+        }
+        .dark input[type='range'] {
+            accent-color: #dfdfdf;
+        }
+        .container {
+            max-width: 730px;
+            margin: auto;
+            padding-top: 1.5rem;
+        }
+        .details:hover {
+            text-decoration: underline;
+        }
+        .gr-button {
+            white-space: nowrap;
+        }
+        .gr-button:focus {
+            border-color: rgb(147 197 253 / var(--tw-border-opacity));
+            outline: none;
+            box-shadow: var(--tw-ring-offset-shadow), var(--tw-ring-shadow), var(--tw-shadow, 0 0 #0000);
+            --tw-border-opacity: 1;
+            --tw-ring-offset-shadow: var(--tw-ring-inset) 0 0 0 var(--tw-ring-offset-width) var(--tw-ring-offset-color);
+            --tw-ring-shadow: var(--tw-ring-inset) 0 0 0 calc(3px var(--tw-ring-offset-width)) var(--tw-ring-color);
+            --tw-ring-color: rgb(191 219 254 / var(--tw-ring-opacity));
+            --tw-ring-opacity: .5;
+        }
+        .footer {
+            margin-bottom: 45px;
+            margin-top: 35px;
+            text-align: center;
+            border-bottom: 1px solid #e5e5e5;
+        }
+        .footer>p {
+            font-size: .8rem;
+            display: inline-block;
+            padding: 0 10px;
+            transform: translateY(10px);
+            background: white;
+        }
+        .dark .footer {
+            border-color: #303030;
+        }
+        .dark .footer>p {
+            background: #0b0f19;
+        }
+        .prompt h4{
+            margin: 1.25em 0 .25em 0;
+            font-weight: bold;
+            font-size: 115%;
+        }
+"""
+block = gr.Blocks(css=css)
+with block:
+    gr.HTML(
+        """
+            <div style="text-align: center; max-width: 650px; margin: 0 auto;">
+              <div
+                style="
+                  display: inline-flex;
+                  align-items: center;
+                  gap: 0.8rem;
+                  font-size: 1.75rem;
+                "
+              >
+                <h1 style="font-weight: 900; margin-bottom: 7px;">
+                  OpenAI Whisper
+                </h1>
+              </div>
+              <p style="margin-bottom: 10px; font-size: 94%">
+                Whisper is a general-purpose speech recognition model. It is trained on a large dataset of diverse audio and is also a multi-task model that can perform multilingual speech recognition as well as speech translation and language identification.
+              </p>
+            </div>
+        """
+    )
+    with gr.Group():
+        with gr.Box():
+            with gr.Row().style(mobile_collapse=False, equal_height=True):
+                audio = gr.Audio(
+                    label="Input Audio",
+                    show_label=False,
+                    source="microphone",
+                    type="filepath"
+                )
+                btn = gr.Button("Transcribe")
+        lang_str = gr.Textbox(label="language")
+        text = gr.Textbox(label="Transcription")
+        btn.click(inference, inputs=[audio], outputs=[lang_str,text])
+        gr.HTML('''
+        <div class="footer">
+                    <p>Model by <a href="https://github.com/openai/whisper" style="text-decoration: underline;" target="_blank">OpenAI</a>
+                    </p>
+        </div>
+        ''')
+block.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ transformers
2	+ git+https://github.com/openai/whisper.git