Spaces:
Runtime error
Runtime error
from transformers import pipeline | |
import gradio as gr | |
import time | |
import whisper | |
#p = pipeline("automatic-speech-recognition",model="jonatasgrosman/wav2vec2-large-xlsr-53-spanish") | |
from transformers import WhisperProcessor, WhisperForConditionalGeneration | |
from datasets import load_dataset | |
#processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2") | |
#model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2") | |
#model.config.forced_decoder_ids = None | |
model = whisper.load_model("large") | |
# load model and processor | |
#processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2") | |
#model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2") | |
#model.config.forced_decoder_ids = None | |
# load dummy dataset and read audio files | |
#ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation") | |
#sample = ds[0]["audio"] | |
#input_features = processor(sample["array"], sampling_rate=sample["sampling_rate"], return_tensors="pt").input_features | |
# generate token ids | |
#predicted_ids = model.generate(input_features) | |
# decode token ids to text | |
#transcription = processor.batch_decode(predicted_ids, skip_special_tokens=False) | |
#['<|startoftranscript|><|en|><|transcribe|><|notimestamps|> Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.<|endoftext|>'] | |
#transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True) | |
#[' Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.'] | |
#def speech_to_text(tmp_filename, model_size): | |
# model = whisper.load_model(model_size) | |
# result = model.transcribe(tmp_filename) | |
# | |
# return result["text"] | |
#gr.Interface( | |
# fn=speech_to_text, | |
# inputs=[ | |
# gr.Audio(source="microphone", type="filepath"), | |
# gr.Dropdown(choices=["tiny", "base", "small", "medium", "large"]), | |
# ], | |
# outputs="text").launch() | |
def transcribe(language,audio, state=""):#language="Spanish", | |
time.sleep(1) | |
if language=="Multi": | |
state="" | |
result = model.transcribe(audio) | |
text = result["text"]#processor.batch_decode(predicted_ids, skip_special_tokens=False) | |
# if language=="Catalan": | |
# state="" | |
# text = pc(audio)["text"] | |
# if language=="English": | |
# state="" | |
# text = pe(audio)["text"] | |
# if language=="French": | |
# state="" | |
# text = pf(audio)["text"] | |
# if language=="Japanese": | |
# state="" | |
# text = pj(audio)["text"] | |
state += text + " " | |
#text2="Esto es loq ue te he entendido" | |
return state, state | |
demo=gr.Interface( | |
fn=transcribe, | |
title="TEDCAS Offline Speech recognition", | |
description="1)Select language 2)Click on 'record from microphone' and talk 3)Click on 'stop recording' 4)Click on submit 5)Before starting again, click on 'clear'", | |
inputs=[ | |
#gr.Dropdown(["Spanish","Catalan","English", "French", "Japanese"],value="Spanish"), | |
gr.Dropdown(["Multi","Spanish"],value="Multi"), | |
#gr.Audio(source="microphone", type="filepath", streaming=True), | |
gr.inputs.Audio(source="microphone", type="filepath"), | |
"state"#,"language" | |
], | |
outputs=[ | |
"textbox", | |
"state" | |
], | |
#live=True).launch() | |
) | |
demo.launch() | |
#demo.launch(auth=("TedCas", "Kike1234")) | |