mikr commited on
Commit
e473647
1 Parent(s): 9963f1b
Files changed (1) hide show
  1. app.py +15 -7
app.py CHANGED
@@ -1,16 +1,17 @@
1
  import gradio as gr
2
  import torch
3
- from transformers import pipeline
4
 
5
  MODEL_NAME = "mikr/w2v-bert-2.0-czech-colab-cv16"
6
  lang = "cs"
7
 
8
  device = 0 if torch.cuda.is_available() else "cpu"
 
 
 
 
9
  pipe = pipeline(
10
- task="automatic-speech-recognition",
11
  model=MODEL_NAME,
12
- chunk_length_s=30,
13
- device=device,
14
  )
15
 
16
  def transcribe(file_upload):
@@ -19,14 +20,21 @@ def transcribe(file_upload):
19
  return "ERROR: You have to either use the microphone or upload an audio file"
20
 
21
  file = file_upload
22
-
23
  text = pipe(file)["text"]
24
-
25
  return warn_output + text
26
 
 
 
 
 
 
 
 
 
 
27
 
28
  iface = gr.Interface(
29
- fn=transcribe,
30
  inputs=[
31
  gr.File(type="binary", label="Upload Audio File"), # Audio file upload
32
  ],
 
1
  import gradio as gr
2
  import torch
3
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
4
 
5
  MODEL_NAME = "mikr/w2v-bert-2.0-czech-colab-cv16"
6
  lang = "cs"
7
 
8
  device = 0 if torch.cuda.is_available() else "cpu"
9
+
10
+ model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME).to(device)
11
+ processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
12
+
13
  pipe = pipeline(
 
14
  model=MODEL_NAME,
 
 
15
  )
16
 
17
  def transcribe(file_upload):
 
20
  return "ERROR: You have to either use the microphone or upload an audio file"
21
 
22
  file = file_upload
 
23
  text = pipe(file)["text"]
 
24
  return warn_output + text
25
 
26
+ def transcribe2(file_upload):
27
+ with torch.inference_mode():
28
+ input_values = processor(wav, sampling_rate=16000).input_values[0]
29
+ input_values = torch.tensor(input_values, device=device).unsqueeze(0)
30
+ logits = model(input_values).logits
31
+ pred_ids = torch.argmax(logits, dim=-1)
32
+ xcp = processor.batch_decode(pred_ids)
33
+ return xcp[0]
34
+
35
 
36
  iface = gr.Interface(
37
+ fn=transcribe2,
38
  inputs=[
39
  gr.File(type="binary", label="Upload Audio File"), # Audio file upload
40
  ],