camparchimedes commited on
Commit
592f7e1
1 Parent(s): f691af5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -4
app.py CHANGED
@@ -1,7 +1,67 @@
1
  import gradio as gr
 
 
 
 
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
- demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import warnings
3
+ import torch
4
+ from transformers import WhisperTokenizer, WhisperForConditionalGeneration, WhisperProcessor
5
+ import soundfile as sf
6
 
7
+ warnings.filterwarnings("ignore")
 
8
 
9
+ # Load tokenizer and model
10
+ tokenizer = WhisperTokenizer.from_pretrained("NbAiLabBeta/nb-whisper-medium")
11
+ model = WhisperForConditionalGeneration.from_pretrained("NbAiLabBeta/nb-whisper-medium")
12
+ processor = WhisperProcessor.from_pretrained("NbAiLabBeta/nb-whisper-medium")
13
+
14
+ # Set up the device
15
+ device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
16
+ torch_dtype = torch.float32
17
+
18
+ # Initialize pipeline
19
+ #asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=device, torch_dtype=torch_dtype)
20
+
21
+ #def transcribe_audio(audio_file):
22
+ #with torch.no_grad():
23
+ #output = asr(audio_file, chunk_length_s=28, generate_kwargs={"num_beams": 5, "task": "transcribe", "language": "no"})
24
+ #return output["text"]
25
+
26
+ def transcribe_audio(audio_file):
27
+ audio_input, _ = sf.read(audio_file)
28
+ inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt")
29
+ inputs = inputs.to(device)
30
+ with torch.no_grad():
31
+ output = model.generate(
32
+ inputs.input_features,
33
+ max_length=448,
34
+ chunk_length_s=28,
35
+ num_beams=5,
36
+ task="transcribe",
37
+ language="no"
38
+ )
39
+ transcription = processor.batch_decode(output, skip_special_tokens=True)[0]
40
+ return transcription
41
+ #print(transcription)
42
+
43
+ # HTML for banner image
44
+ banner_html = """
45
+ <div style="text-align: center;">
46
+ <img src="https://huggingface.co/spaces/camparchimedes/work_harder/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%; height:auto;">
47
+ </div>
48
+ """
49
+
50
+ # Create Gradio interface
51
+ iface = gr.Blocks()
52
+
53
+ with iface:
54
+ gr.HTML(banner_html)
55
+ gr.Interface(
56
+ fn=transcribe_audio,
57
+ inputs=gr.Audio(type="filepath"),
58
+ outputs="text",
59
+ title="Audio Transcription App",
60
+ description="Upload an audio file to get the transcription",
61
+ theme="default",
62
+ layout="vertical",
63
+ live=False
64
+ )
65
+
66
+ # Launch the interface
67
+ iface.launch(share=True, debug=True)