Spaces:
Build error
Build error
Commit
•
5e4096f
1
Parent(s):
877e925
Update app.py
Browse files
app.py
CHANGED
@@ -15,14 +15,6 @@ processor = WhisperProcessor.from_pretrained("NbAiLabBeta/nb-whisper-medium")
|
|
15 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
16 |
torch_dtype = torch.float32
|
17 |
|
18 |
-
# Initialize pipeline
|
19 |
-
#asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=device, torch_dtype=torch_dtype)
|
20 |
-
|
21 |
-
#def transcribe_audio(audio_file):
|
22 |
-
#with torch.no_grad():
|
23 |
-
#output = asr(audio_file, chunk_length_s=28, generate_kwargs={"num_beams": 5, "task": "transcribe", "language": "no"})
|
24 |
-
#return output["text"]
|
25 |
-
|
26 |
def transcribe_audio(audio_file):
|
27 |
audio_input, _ = sf.read(audio_file)
|
28 |
inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt")
|
@@ -31,19 +23,17 @@ def transcribe_audio(audio_file):
|
|
31 |
output = model.generate(
|
32 |
inputs.input_features,
|
33 |
max_length=448,
|
34 |
-
chunk_length_s=28,
|
35 |
num_beams=5,
|
36 |
task="transcribe",
|
37 |
language="no"
|
38 |
)
|
39 |
transcription = processor.batch_decode(output, skip_special_tokens=True)[0]
|
40 |
return transcription
|
41 |
-
#print(transcription)
|
42 |
|
43 |
# HTML for banner image
|
44 |
banner_html = """
|
45 |
<div style="text-align: center;">
|
46 |
-
<img src="https://huggingface.co/spaces/camparchimedes/
|
47 |
</div>
|
48 |
"""
|
49 |
|
@@ -52,16 +42,12 @@ iface = gr.Blocks()
|
|
52 |
|
53 |
with iface:
|
54 |
gr.HTML(banner_html)
|
55 |
-
gr.
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
theme="default",
|
62 |
-
layout="vertical",
|
63 |
-
live=False
|
64 |
-
)
|
65 |
|
66 |
# Launch the interface
|
67 |
-
iface.launch(share=True, debug=True)
|
|
|
15 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
16 |
torch_dtype = torch.float32
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
def transcribe_audio(audio_file):
|
19 |
audio_input, _ = sf.read(audio_file)
|
20 |
inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt")
|
|
|
23 |
output = model.generate(
|
24 |
inputs.input_features,
|
25 |
max_length=448,
|
|
|
26 |
num_beams=5,
|
27 |
task="transcribe",
|
28 |
language="no"
|
29 |
)
|
30 |
transcription = processor.batch_decode(output, skip_special_tokens=True)[0]
|
31 |
return transcription
|
|
|
32 |
|
33 |
# HTML for banner image
|
34 |
banner_html = """
|
35 |
<div style="text-align: center;">
|
36 |
+
<img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%; height:auto;">
|
37 |
</div>
|
38 |
"""
|
39 |
|
|
|
42 |
|
43 |
with iface:
|
44 |
gr.HTML(banner_html)
|
45 |
+
gr.Markdown("# Audio Transcription App\nUpload an audio file to get the transcription")
|
46 |
+
audio_input = gr.Audio(type="filepath")
|
47 |
+
transcription_output = gr.Textbox()
|
48 |
+
transcribe_button = gr.Button("Transcribe")
|
49 |
+
|
50 |
+
transcribe_button.click(fn=transcribe_audio, inputs=audio_input, outputs=transcription_output)
|
|
|
|
|
|
|
|
|
51 |
|
52 |
# Launch the interface
|
53 |
+
iface.launch(share=True, debug=True)
|