Update app.py
Browse files
app.py
CHANGED
@@ -15,22 +15,34 @@ processor_is = Wav2Vec2Processor.from_pretrained(MODEL_IS)
|
|
15 |
model_fo = Wav2Vec2ForCTC.from_pretrained(MODEL_FO).to(device)
|
16 |
processor_fo = Wav2Vec2Processor.from_pretrained(MODEL_FO)
|
17 |
|
18 |
-
def
|
19 |
-
wav, sr = sf.read(
|
20 |
if len(wav.shape) == 2:
|
21 |
wav = wav.mean(1)
|
22 |
if sr != 16000:
|
23 |
wlen = int(wav.shape[0] / sr * 16000)
|
24 |
wav = signal.resample(wav, wlen)
|
25 |
-
|
|
|
|
|
|
|
26 |
with torch.inference_mode():
|
27 |
-
input_values =
|
28 |
input_values = torch.tensor(input_values, device=device).unsqueeze(0)
|
29 |
-
logits =
|
30 |
pred_ids = torch.argmax(logits, dim=-1)
|
31 |
-
xcp =
|
32 |
return xcp
|
33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
bl = gr.Blocks()
|
36 |
with bl:
|
@@ -50,16 +62,16 @@ with bl:
|
|
50 |
with gr.Tabs():
|
51 |
with gr.TabItem("Icelandic"):
|
52 |
with gr.Row():
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
with gr.TabItem("Faroese"):
|
58 |
with gr.Row():
|
59 |
audio_file = gr.Audio(type="filepath")
|
60 |
text_output = gr.Textbox()
|
61 |
text_button = gr.Button("Recognise")
|
62 |
-
text_button.click(
|
63 |
|
64 |
bl.launch()
|
65 |
|
|
|
15 |
model_fo = Wav2Vec2ForCTC.from_pretrained(MODEL_FO).to(device)
|
16 |
processor_fo = Wav2Vec2Processor.from_pretrained(MODEL_FO)
|
17 |
|
18 |
+
def readwav(a_f):
|
19 |
+
wav, sr = sf.read(a_f, dtype=np.float32)
|
20 |
if len(wav.shape) == 2:
|
21 |
wav = wav.mean(1)
|
22 |
if sr != 16000:
|
23 |
wlen = int(wav.shape[0] / sr * 16000)
|
24 |
wav = signal.resample(wav, wlen)
|
25 |
+
return wav
|
26 |
+
|
27 |
+
def recis(audio_file):
|
28 |
+
wav = readwav(audio_file)
|
29 |
with torch.inference_mode():
|
30 |
+
input_values = processor_is(wav,sampling_rate=16000).input_values[0]
|
31 |
input_values = torch.tensor(input_values, device=device).unsqueeze(0)
|
32 |
+
logits = model_is(input_values).logits
|
33 |
pred_ids = torch.argmax(logits, dim=-1)
|
34 |
+
xcp = processor_is.batch_decode(pred_ids)
|
35 |
return xcp
|
36 |
|
37 |
+
def recfo(audio_file):
|
38 |
+
wav = readwav(audio_file)
|
39 |
+
with torch.inference_mode():
|
40 |
+
input_values = processor_fo(wav,sampling_rate=16000).input_values[0]
|
41 |
+
input_values = torch.tensor(input_values, device=device).unsqueeze(0)
|
42 |
+
logits = model_fo(input_values).logits
|
43 |
+
pred_ids = torch.argmax(logits, dim=-1)
|
44 |
+
xcp = processor_fo.batch_decode(pred_ids)
|
45 |
+
return xcp
|
46 |
|
47 |
bl = gr.Blocks()
|
48 |
with bl:
|
|
|
62 |
with gr.Tabs():
|
63 |
with gr.TabItem("Icelandic"):
|
64 |
with gr.Row():
|
65 |
+
audio_file = gr.Audio(type="filepath")
|
66 |
+
text_output = gr.Textbox()
|
67 |
+
text_button = gr.Button("Recognise")
|
68 |
+
text_button.click(recis, inputs=audio_file, outputs=text_output)
|
69 |
with gr.TabItem("Faroese"):
|
70 |
with gr.Row():
|
71 |
audio_file = gr.Audio(type="filepath")
|
72 |
text_output = gr.Textbox()
|
73 |
text_button = gr.Button("Recognise")
|
74 |
+
text_button.click(recfo, inputs=audio_file, outputs=text_output)
|
75 |
|
76 |
bl.launch()
|
77 |
|