nunenuh commited on
Commit
4b068e8
1 Parent(s): 80a0e95

fix: fixing error to generate text

Browse files
app.py CHANGED
@@ -16,7 +16,7 @@ demo = gr.Interface(
16
  choices=["indonesian","english"],
17
  value="indonesian"),
18
  gr.Audio(label="Speak", source="microphone", type="numpy"),
19
- gr.Audio(label="Upload audio", source="upload", type="numpy"),
20
  ],
21
  outputs=[gr.TextArea(label="Output Text"),],
22
  title="OpenAI Whisper Base",
 
16
  choices=["indonesian","english"],
17
  value="indonesian"),
18
  gr.Audio(label="Speak", source="microphone", type="numpy"),
19
+ gr.Audio(label="Upload Audio", source="upload", type="numpy"),
20
  ],
21
  outputs=[gr.TextArea(label="Output Text"),],
22
  title="OpenAI Whisper Base",
src/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (147 Bytes). View file
 
src/__pycache__/infer.cpython-310.pyc ADDED
Binary file (1.14 kB). View file
 
src/__pycache__/utils.cpython-310.pyc ADDED
Binary file (940 Bytes). View file
 
src/infer.py CHANGED
@@ -3,7 +3,7 @@ from typing import *
3
  from src import utils
4
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
5
 
6
- model_name: str = f"openai/whisper-base"
7
  processor: Any = WhisperProcessor.from_pretrained(model_name)
8
  model: Any = WhisperForConditionalGeneration.from_pretrained(model_name)
9
 
 
3
  from src import utils
4
  from transformers import WhisperProcessor, WhisperForConditionalGeneration
5
 
6
+ model_name: str = f"openai/whisper-small"
7
  processor: Any = WhisperProcessor.from_pretrained(model_name)
8
  model: Any = WhisperForConditionalGeneration.from_pretrained(model_name)
9
 
src/utils.py CHANGED
@@ -7,16 +7,18 @@ sample_rate: int = 16000
7
  float_factor: float = 32678.0
8
 
9
  def preprocess_audio(sampling_rate, waveform):
10
- waveform = waveform / float_factor
11
 
12
- if len(waveform) > 1:
13
  waveform = librosa.to_mono(waveform.T)
14
 
15
  if sampling_rate != sample_rate:
16
  waveform = librosa.resample(waveform, orig_sr=sampling_rate, target_sr=sample_rate)
17
 
18
- waveform = waveform[:sample_rate*30]
19
- waveform = torch.tensor(waveform)
 
 
20
  return waveform
21
 
22
 
 
7
  float_factor: float = 32678.0
8
 
9
  def preprocess_audio(sampling_rate, waveform):
10
+ waveform: float = waveform / float_factor
11
 
12
+ if len(waveform.shape) > 1:
13
  waveform = librosa.to_mono(waveform.T)
14
 
15
  if sampling_rate != sample_rate:
16
  waveform = librosa.resample(waveform, orig_sr=sampling_rate, target_sr=sample_rate)
17
 
18
+ # limit to 30 seconds
19
+ waveform: float = waveform[:sample_rate * 30]
20
+
21
+ waveform: float = torch.tensor(waveform)
22
  return waveform
23
 
24