from transformers import pipeline import gradio as gr asr = pipeline(task="automatic-speech-recognition", model="openai/whisper-medium") # Especificar el idioma de salida en español asr.model.config.forced_decoder_ids = asr.tokenizer.get_decoder_prompt_ids(language="spanish", task="transcribe") demo = gr.Blocks() def transcribe_long_form(filepath): if filepath is None: gr.Warning("No audio found, please retry.") return "" output = asr( filepath, max_new_tokens=256, chunk_length_s=30, batch_size=8, ) return output["text"] ner = pipeline("ner", model="mrm8488/bert-spanish-cased-finetuned-ner", ) def get_ner(input_text): if input_text is None: gr.Warning("No transcription found, please retry.") return {"text": "", "entities": ""} output = ner(input_text) return {"text": input_text, "entities": output} def main(filepath): transcription = transcribe_long_form(filepath) ner = get_ner(transcription) return transcription, ner mic_transcribe = gr.Interface( fn=main, inputs=gr.Audio(sources="microphone", type="filepath"), outputs=[gr.Textbox(label="Transcription", lines=3), gr.HighlightedText(label="Text with entities")], title="Transcribir audio desde grabación", description="Transcripción de audio grabado desde micrófono.", allow_flagging="never") file_transcribe = gr.Interface( fn=main, inputs=gr.Audio(sources="upload", type="filepath"), outputs=[gr.Textbox(label="Transcription", lines=3), gr.HighlightedText(label="Text with entities")], title="Transcribir audio desde archivo", description="Transcripción a partir de un archivo de audio.", allow_flagging="never", ) with demo: gr.TabbedInterface( [mic_transcribe, file_transcribe], ["Transcribe Microphone", "Transcribe Audio File"], ) demo.launch()