camparchimedes commited on
Commit
5e4096f
1 Parent(s): 877e925

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -22
app.py CHANGED
@@ -15,14 +15,6 @@ processor = WhisperProcessor.from_pretrained("NbAiLabBeta/nb-whisper-medium")
15
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
16
  torch_dtype = torch.float32
17
 
18
- # Initialize pipeline
19
- #asr = pipeline("automatic-speech-recognition", model=model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, device=device, torch_dtype=torch_dtype)
20
-
21
- #def transcribe_audio(audio_file):
22
- #with torch.no_grad():
23
- #output = asr(audio_file, chunk_length_s=28, generate_kwargs={"num_beams": 5, "task": "transcribe", "language": "no"})
24
- #return output["text"]
25
-
26
  def transcribe_audio(audio_file):
27
  audio_input, _ = sf.read(audio_file)
28
  inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt")
@@ -31,19 +23,17 @@ def transcribe_audio(audio_file):
31
  output = model.generate(
32
  inputs.input_features,
33
  max_length=448,
34
- chunk_length_s=28,
35
  num_beams=5,
36
  task="transcribe",
37
  language="no"
38
  )
39
  transcription = processor.batch_decode(output, skip_special_tokens=True)[0]
40
  return transcription
41
- #print(transcription)
42
 
43
  # HTML for banner image
44
  banner_html = """
45
  <div style="text-align: center;">
46
- <img src="https://huggingface.co/spaces/camparchimedes/work_harder/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%; height:auto;">
47
  </div>
48
  """
49
 
@@ -52,16 +42,12 @@ iface = gr.Blocks()
52
 
53
  with iface:
54
  gr.HTML(banner_html)
55
- gr.Interface(
56
- fn=transcribe_audio,
57
- inputs=gr.Audio(type="filepath"),
58
- outputs="text",
59
- title="Audio Transcription App",
60
- description="Upload an audio file to get the transcription",
61
- theme="default",
62
- layout="vertical",
63
- live=False
64
- )
65
 
66
  # Launch the interface
67
- iface.launch(share=True, debug=True)
 
15
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
16
  torch_dtype = torch.float32
17
 
 
 
 
 
 
 
 
 
18
  def transcribe_audio(audio_file):
19
  audio_input, _ = sf.read(audio_file)
20
  inputs = processor(audio_input, sampling_rate=16000, return_tensors="pt")
 
23
  output = model.generate(
24
  inputs.input_features,
25
  max_length=448,
 
26
  num_beams=5,
27
  task="transcribe",
28
  language="no"
29
  )
30
  transcription = processor.batch_decode(output, skip_special_tokens=True)[0]
31
  return transcription
 
32
 
33
  # HTML for banner image
34
  banner_html = """
35
  <div style="text-align: center;">
36
+ <img src="https://huggingface.co/spaces/camparchimedes/ola_s-audioshop/raw/main/Olas%20AudioSwitch%20Shop.png" alt="Banner" width="87%; height:auto;">
37
  </div>
38
  """
39
 
 
42
 
43
  with iface:
44
  gr.HTML(banner_html)
45
+ gr.Markdown("# Audio Transcription App\nUpload an audio file to get the transcription")
46
+ audio_input = gr.Audio(type="filepath")
47
+ transcription_output = gr.Textbox()
48
+ transcribe_button = gr.Button("Transcribe")
49
+
50
+ transcribe_button.click(fn=transcribe_audio, inputs=audio_input, outputs=transcription_output)
 
 
 
 
51
 
52
  # Launch the interface
53
+ iface.launch(share=True, debug=True)