clr commited on
Commit
5adac55
1 Parent(s): d0b4e13

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -11
app.py CHANGED
@@ -15,22 +15,34 @@ processor_is = Wav2Vec2Processor.from_pretrained(MODEL_IS)
15
  model_fo = Wav2Vec2ForCTC.from_pretrained(MODEL_FO).to(device)
16
  processor_fo = Wav2Vec2Processor.from_pretrained(MODEL_FO)
17
 
18
- def recc(audio_file,model,processor):
19
- wav, sr = sf.read(audio_file, dtype=np.float32)
20
  if len(wav.shape) == 2:
21
  wav = wav.mean(1)
22
  if sr != 16000:
23
  wlen = int(wav.shape[0] / sr * 16000)
24
  wav = signal.resample(wav, wlen)
25
-
 
 
 
26
  with torch.inference_mode():
27
- input_values = processor(wav,sampling_rate=16000).input_values[0]
28
  input_values = torch.tensor(input_values, device=device).unsqueeze(0)
29
- logits = model(input_values).logits
30
  pred_ids = torch.argmax(logits, dim=-1)
31
- xcp = processor.batch_decode(pred_ids)
32
  return xcp
33
 
 
 
 
 
 
 
 
 
 
34
 
35
  bl = gr.Blocks()
36
  with bl:
@@ -50,16 +62,16 @@ with bl:
50
  with gr.Tabs():
51
  with gr.TabItem("Icelandic"):
52
  with gr.Row():
53
- audio_filei = gr.Audio(type="filepath")
54
- text_outputi = gr.Textbox()
55
- text_buttoni = gr.Button("Recognise")
56
- text_buttoni.click(recc, inputs=[audio_filei,model_is,processor_is], outputs=text_outputi)
57
  with gr.TabItem("Faroese"):
58
  with gr.Row():
59
  audio_file = gr.Audio(type="filepath")
60
  text_output = gr.Textbox()
61
  text_button = gr.Button("Recognise")
62
- text_button.click(recc, inputs=[audio_file,model_fo,processor_fo], outputs=text_output)
63
 
64
  bl.launch()
65
 
 
15
  model_fo = Wav2Vec2ForCTC.from_pretrained(MODEL_FO).to(device)
16
  processor_fo = Wav2Vec2Processor.from_pretrained(MODEL_FO)
17
 
18
+ def readwav(a_f):
19
+ wav, sr = sf.read(a_f, dtype=np.float32)
20
  if len(wav.shape) == 2:
21
  wav = wav.mean(1)
22
  if sr != 16000:
23
  wlen = int(wav.shape[0] / sr * 16000)
24
  wav = signal.resample(wav, wlen)
25
+ return wav
26
+
27
+ def recis(audio_file):
28
+ wav = readwav(audio_file)
29
  with torch.inference_mode():
30
+ input_values = processor_is(wav,sampling_rate=16000).input_values[0]
31
  input_values = torch.tensor(input_values, device=device).unsqueeze(0)
32
+ logits = model_is(input_values).logits
33
  pred_ids = torch.argmax(logits, dim=-1)
34
+ xcp = processor_is.batch_decode(pred_ids)
35
  return xcp
36
 
37
+ def recfo(audio_file):
38
+ wav = readwav(audio_file)
39
+ with torch.inference_mode():
40
+ input_values = processor_fo(wav,sampling_rate=16000).input_values[0]
41
+ input_values = torch.tensor(input_values, device=device).unsqueeze(0)
42
+ logits = model_fo(input_values).logits
43
+ pred_ids = torch.argmax(logits, dim=-1)
44
+ xcp = processor_fo.batch_decode(pred_ids)
45
+ return xcp
46
 
47
  bl = gr.Blocks()
48
  with bl:
 
62
  with gr.Tabs():
63
  with gr.TabItem("Icelandic"):
64
  with gr.Row():
65
+ audio_file = gr.Audio(type="filepath")
66
+ text_output = gr.Textbox()
67
+ text_button = gr.Button("Recognise")
68
+ text_button.click(recis, inputs=audio_file, outputs=text_output)
69
  with gr.TabItem("Faroese"):
70
  with gr.Row():
71
  audio_file = gr.Audio(type="filepath")
72
  text_output = gr.Textbox()
73
  text_button = gr.Button("Recognise")
74
+ text_button.click(recfo, inputs=audio_file, outputs=text_output)
75
 
76
  bl.launch()
77