Spaces:
Sleeping
Sleeping
trans2
Browse files
app.py
CHANGED
@@ -1,16 +1,17 @@
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
-
from transformers import pipeline
|
4 |
|
5 |
MODEL_NAME = "mikr/w2v-bert-2.0-czech-colab-cv16"
|
6 |
lang = "cs"
|
7 |
|
8 |
device = 0 if torch.cuda.is_available() else "cpu"
|
|
|
|
|
|
|
|
|
9 |
pipe = pipeline(
|
10 |
-
task="automatic-speech-recognition",
|
11 |
model=MODEL_NAME,
|
12 |
-
chunk_length_s=30,
|
13 |
-
device=device,
|
14 |
)
|
15 |
|
16 |
def transcribe(file_upload):
|
@@ -19,14 +20,21 @@ def transcribe(file_upload):
|
|
19 |
return "ERROR: You have to either use the microphone or upload an audio file"
|
20 |
|
21 |
file = file_upload
|
22 |
-
|
23 |
text = pipe(file)["text"]
|
24 |
-
|
25 |
return warn_output + text
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
iface = gr.Interface(
|
29 |
-
fn=
|
30 |
inputs=[
|
31 |
gr.File(type="binary", label="Upload Audio File"), # Audio file upload
|
32 |
],
|
|
|
1 |
import gradio as gr
|
2 |
import torch
|
3 |
+
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
|
4 |
|
5 |
MODEL_NAME = "mikr/w2v-bert-2.0-czech-colab-cv16"
|
6 |
lang = "cs"
|
7 |
|
8 |
device = 0 if torch.cuda.is_available() else "cpu"
|
9 |
+
|
10 |
+
model = Wav2Vec2ForCTC.from_pretrained(MODEL_NAME).to(device)
|
11 |
+
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
|
12 |
+
|
13 |
pipe = pipeline(
|
|
|
14 |
model=MODEL_NAME,
|
|
|
|
|
15 |
)
|
16 |
|
17 |
def transcribe(file_upload):
|
|
|
20 |
return "ERROR: You have to either use the microphone or upload an audio file"
|
21 |
|
22 |
file = file_upload
|
|
|
23 |
text = pipe(file)["text"]
|
|
|
24 |
return warn_output + text
|
25 |
|
26 |
+
def transcribe2(file_upload):
|
27 |
+
with torch.inference_mode():
|
28 |
+
input_values = processor(wav, sampling_rate=16000).input_values[0]
|
29 |
+
input_values = torch.tensor(input_values, device=device).unsqueeze(0)
|
30 |
+
logits = model(input_values).logits
|
31 |
+
pred_ids = torch.argmax(logits, dim=-1)
|
32 |
+
xcp = processor.batch_decode(pred_ids)
|
33 |
+
return xcp[0]
|
34 |
+
|
35 |
|
36 |
iface = gr.Interface(
|
37 |
+
fn=transcribe2,
|
38 |
inputs=[
|
39 |
gr.File(type="binary", label="Upload Audio File"), # Audio file upload
|
40 |
],
|