Spaces:

CLiC-UB
/

Casper

Runtime error

App Files Files Community

mireiafarrus commited on Oct 17, 2023

Commit

9fe9663

1 Parent(s): 87308cb

Upload 6 files

Browse files

Files changed (6) hide show

app.py +68 -0
catalan_audio_examples/catalan_female_speech_1.wav +0 -0
catalan_audio_examples/catalan_female_speech_2.wav +0 -0
catalan_audio_examples/catalan_male_speech_1.wav +0 -0
examples.py +6 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,68 @@

+from transformers import pipeline
+import gradio as gr
+import torch
+from examples import *
+device = "cuda" if torch.cuda.is_available() else "cpu"
+asr = pipeline(
+    "automatic-speech-recognition",
+    model="MaximilianChen/Casper",
+    chunk_length_s=30,
+    device=device,
+)
+def transcribe_audio(file=None, mic=None):
+    if mic is not None:
+        audio = mic
+    elif file is not None:
+        audio = file
+    else:
+        return "You must either provide a mic recording or a file"
+    transcription = asr(audio)["text"]
+    return transcription
+# css=".gradio-container {background: url('file=background_images/wallpaper_test_mod_2.jpg')}"
+with gr.Blocks() as demo:
+    gr.Markdown("<center><h1>CASPER</h1> "
+                "<h2>Catalan Automatic Speech Recognition using Fine-Tuned Whisper</h2></center>")
+    with gr.Row():
+        with gr.Column():
+            audio_from_microphone = gr.Audio(source="microphone", label="Mic", type="filepath")
+            audio_from_file = gr.Audio(source="upload", label="File", type="filepath")
+            with gr.Row():
+                with gr.Column(scale=2):
+                    asr_btn = gr.Button("Transcribe!")
+                with gr.Column(scale=0):
+                    cln_btn = gr.ClearButton(value='Clear', components=[audio_from_microphone, audio_from_file])
+        with gr.Column():
+            output_text = gr.Textbox(label="Generated Transcription")
+            del_text = gr.ClearButton(value='Delete Text', components=output_text)
+            gr.Markdown("<sub>NOTE: This model does not generate punctuation and casing</sub>")
+    asr_btn.click(fn=transcribe_audio,
+                  inputs=[audio_from_file, audio_from_microphone],
+                  outputs=output_text)
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### Audio Examples")
+            gr.Examples(examples=infer_from_audio_examples,
+                        label="From Catalan Google TTS dataset",
+                        inputs=[audio_from_file, audio_from_microphone],
+                        outputs=output_text,
+                        fn=transcribe_audio,
+                        cache_examples=True, )
+    gr.Markdown("### More Details")
+    gr.Markdown("The model used is a small version of the Whisper architecture. "
+                "Please, find more details about it in this [link](https://huggingface.co/openai/whisper-small) <br>"
+                "Whisper has been fine-tuned using the catalan CommonVoice v.11 and the ParlamentParla datasets. "
+                "More information about results and evaluation can be found in "
+                "[here](https://huggingface.co/MaximilianChen/Casper)")
+demo.launch()

catalan_audio_examples/catalan_female_speech_1.wav ADDED Viewed

Binary file (557 kB). View file

catalan_audio_examples/catalan_female_speech_2.wav ADDED Viewed

Binary file (459 kB). View file

catalan_audio_examples/catalan_male_speech_1.wav ADDED Viewed

Binary file (778 kB). View file

examples.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# list of examples for quick inferences:
+infer_from_audio_examples = [
+    ["catalan_audio_examples/catalan_female_speech_1.wav", None],
+    ["catalan_audio_examples/catalan_female_speech_2.wav", None],
+    ["catalan_audio_examples/catalan_male_speech_1.wav", None],
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+transformers
+torch
+torchaudio