Harveenchadha commited on
Commit
ea8b34f
1 Parent(s): cb71106

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -3
app.py CHANGED
@@ -2,13 +2,24 @@ import soundfile as sf
2
  import torch
3
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
5
 
6
 
7
 
8
  def parse_transcription(wav_file):
9
- print("hello")
10
- audio_input, sample_rate = sf.read(wav_file.name)
11
- input_values = processor(audio_input, sampling_rate=sample_rate, return_tensors="pt").input_values
 
12
 
13
  logits = model(input_values).logits
14
  predicted_ids = torch.argmax(logits, dim=-1)
@@ -17,6 +28,11 @@ def parse_transcription(wav_file):
17
  return transcription
18
 
19
 
 
 
 
 
 
20
  processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
21
  model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
22
 
 
2
  import torch
3
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
4
  import gradio as gr
5
+ import sox
6
+
7
+
8
+
9
+ def convert(inputfile, outfile):
10
+ sox_tfm = sox.Transformer()
11
+ sox_tfm.set_output_format(
12
+ file_type="wav", channels=1, encoding="signed-integer", rate=16000, bits=16
13
+ )
14
+ sox_tfm.build(inputfile, outfile)
15
 
16
 
17
 
18
  def parse_transcription(wav_file):
19
+ filename = wav_file.name.split('.')[0]
20
+ convert(wav_file.name, filename + "16k.wav")
21
+ speech, _ = sf.read(filename + "16k.wav")
22
+ input_values = processor(speech, sampling_rate=16_000, return_tensors="pt").input_values
23
 
24
  logits = model(input_values).logits
25
  predicted_ids = torch.argmax(logits, dim=-1)
 
28
  return transcription
29
 
30
 
31
+ processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
32
+ model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
33
+
34
+
35
+
36
  processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
37
  model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
38