SeyedAli commited on
Commit
7c8c991
1 Parent(s): 66237b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -9,7 +9,7 @@ import torchaudio
9
  processor = AutoProcessor.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
10
  model = AutoModelForCTC.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
11
  audio_input = gr.inputs.Audio(label="صوت گفتار فارسی", type="filepath")
12
- #text_output = gr.Textbox(label="متن فارسی", type="text")
13
  def ASR(audio):
14
  pipe = pipeline("automatic-speech-recognition", model="SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
15
  with tempfile.NamedTemporaryFile(suffix=".wav") as temp_audio_file:
@@ -22,7 +22,7 @@ def ASR(audio):
22
  resampler = torchaudio.transforms.Resample(sample_rate, 16000)
23
  waveform = resampler(waveform)
24
  # Convert the audio to a single channel
25
- waveform = torchaudio.functional.downmix_mono(waveform)
26
  # Convert the PyTorch tensor to a NumPy ndarray
27
  audio_array = waveform.numpy()
28
  #inputs = processor(audio_array, sampling_rate=16_000)
 
9
  processor = AutoProcessor.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
10
  model = AutoModelForCTC.from_pretrained("SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
11
  audio_input = gr.inputs.Audio(label="صوت گفتار فارسی", type="filepath")
12
+ text_output = gr.TextArea(label="متن فارسی", type="text")
13
  def ASR(audio):
14
  pipe = pipeline("automatic-speech-recognition", model="SeyedAli/Persian-Speech-Transcription-Wav2Vec2-V1")
15
  with tempfile.NamedTemporaryFile(suffix=".wav") as temp_audio_file:
 
22
  resampler = torchaudio.transforms.Resample(sample_rate, 16000)
23
  waveform = resampler(waveform)
24
  # Convert the audio to a single channel
25
+ waveform = torch.mean(waveform, dim=0, keepdim=True)
26
  # Convert the PyTorch tensor to a NumPy ndarray
27
  audio_array = waveform.numpy()
28
  #inputs = processor(audio_array, sampling_rate=16_000)