Spaces:

camparchimedes
/

nb

Build error

App Files Files

camparchimedes commited on Aug 6

Commit

592f7e1

•

1 Parent(s): f691af5

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -4

app.py CHANGED Viewed

@@ -1,7 +1,67 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+import warnings
+import torch
+from transformers import WhisperTokenizer, WhisperForConditionalGeneration, WhisperProcessor
+import soundfile as sf
+warnings.filterwarnings("ignore")
+# Load tokenizer and model
+tokenizer = WhisperTokenizer.from_pretrained("NbAiLabBeta/nb-whisper-medium")
+model = WhisperForConditionalGeneration.from_pretrained("NbAiLabBeta/nb-whisper-medium")
+processor = WhisperProcessor.from_pretrained("NbAiLabBeta/nb-whisper-medium")
+# Set up the device
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+torch_dtype = torch.float32
+# Initialize pipeline
+#asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=device, torch_dtype=torch_dtype)
+#def transcribe_audio(audio_file):
+    #with torch.no_grad():
+        #output = asr(audio_file, chunk_length_s=28, generate_kwargs={"num_beams": 5, "task": "transcribe", "language": "no"})
+    #return output["text"]
+def transcribe_audio(audio_file):
+    audio_input, _ = sf.read(audio_file)
+    inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt")
+    inputs = inputs.to(device)
+    with torch.no_grad():
+        output = model.generate(
+            inputs.input_features,
+            max_length=448,
+            chunk_length_s=28,
+            num_beams=5,
+            task="transcribe",
+            language="no"
+        )
+    transcription = processor.batch_decode(output, skip_special_tokens=True)[0]
+    return transcription
+    #print(transcription)
+# HTML for banner image
+banner_html = """
+<div style="text-align: center;">
+    <img src="https://huggingface.co/spaces/camparchimedes/work_harder/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%; height:auto;">
+</div>
+"""
+# Create Gradio interface
+iface = gr.Blocks()
+with iface:
+    gr.HTML(banner_html)
+    gr.Interface(
+        fn=transcribe_audio,
+        inputs=gr.Audio(type="filepath"),
+        outputs="text",
+        title="Audio Transcription App",
+        description="Upload an audio file to get the transcription",
+        theme="default",
+        layout="vertical",
+        live=False
+    )
+# Launch the interface
+iface.launch(share=True, debug=True)