File size: 909 Bytes
9d3afd7
b004a55
3a6c9ee
 
b004a55
177ad11
b004a55
9d3afd7
36bd290
 
 
 
 
 
 
b004a55
36bd290
9d3afd7
36bd290
 
 
9d3afd7
36bd290
 
 
9d3afd7
36bd290
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from transformers import pipeline
from transformers import  WhisperProcessor, WhisperForConditionalGeneration
import gradio as gr

model = WhisperForConditionalGeneration.from_pretrained("MaximilianChen/Casper")
processor = WhisperProcessor.from_pretrained("MaximilianChen/Casper", language='catalan', task='transcribe')
asr = pipeline("automatic-speech-recognition", model=model, pprocessor=processor)

def transcribe_audio(mic=None, file=None):
    if mic is not None:
        audio = mic
    elif file is not None:
        audio = file
    else:
        return "You must either provide a mic recording or a file"
    transcription = asr(audio)["text"]
    return transcription


gr.Interface(
    fn=transcribe_audio,
    inputs=[
        gr.Audio(source="microphone", type="filepath", optional=True),
        gr.Audio(source="upload", type="filepath", optional=True),
    ],
    outputs="text",
).launch()