Harveenchadha commited on
Commit
4f9fa63
1 Parent(s): 0b02e4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -5
app.py CHANGED
@@ -21,10 +21,26 @@ def read_file(wav):
21
  return resampled_signal
22
 
23
 
24
- def parse_transcription(wav_file):
 
 
 
 
 
 
 
 
 
 
 
 
25
  filename = wav_file.split('.')[0]
26
  convert(wav_file, filename + "16k.wav")
27
  speech, _ = sf.read(filename + "16k.wav")
 
 
 
 
28
 
29
 
30
  #speech = read_file(wav_file)
@@ -36,9 +52,10 @@ def parse_transcription(wav_file):
36
  transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
37
  return transcription
38
 
39
-
40
- processor = Wav2Vec2Processor.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
41
- model = Wav2Vec2ForCTC.from_pretrained("Harveenchadha/vakyansh-wav2vec2-hindi-him-4200")
 
42
 
43
 
44
 
@@ -49,6 +66,8 @@ txtbox = gr.Textbox(
49
  lines=5
50
  )
51
 
52
- gr.Interface(parse_transcription, inputs = input_, outputs=txtbox,
 
 
53
  streaming=True, interactive=True,
54
  analytics_enabled=False, show_tips=False, enable_queue=True).launch(inline=False);
 
21
  return resampled_signal
22
 
23
 
24
+ def parse_transcription_with_lm(wav_file):
25
+ speech = convert_file(wav_file)
26
+ inputs = processor(batch["speech"], sampling_rate=16_000, return_tensors="pt", padding=True)
27
+
28
+ with torch.no_grad():
29
+ logits = model(**inputs).logits
30
+ int_result = processor.batch_decode(logits.cpu().numpy())
31
+
32
+ transcription = int_result.text
33
+ return transcription
34
+
35
+
36
+ def convert_file(wav_file):
37
  filename = wav_file.split('.')[0]
38
  convert(wav_file, filename + "16k.wav")
39
  speech, _ = sf.read(filename + "16k.wav")
40
+ return speech
41
+
42
+ def parse_transcription(wav_file):
43
+ speech = convert_file(wav_file)
44
 
45
 
46
  #speech = read_file(wav_file)
 
52
  transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
53
  return transcription
54
 
55
+ model_id = "Harveenchadha/vakyansh-wav2vec2-hindi-him-4200"
56
+ processor = Wav2Vec2Processor.from_pretrained(model_id)
57
+ processor_with_LM = Wav2Vec2ProcessorWithLM.from_pretrained(model_id)
58
+ model = Wav2Vec2ForCTC.from_pretrained(model_id)
59
 
60
 
61
 
 
66
  lines=5
67
  )
68
 
69
+ chkbox = gr.Checkbox(label="Apply LM", value=False)
70
+
71
+ gr.Interface(parse_transcription, inputs = [input_, chckbox], outputs=txtbox,
72
  streaming=True, interactive=True,
73
  analytics_enabled=False, show_tips=False, enable_queue=True).launch(inline=False);