LordCoffee's picture
Update app.py
1eb675b verified
import gradio as gr
import torch
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
# Cargar el modelo Wav2Vec2 y el procesador
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-960h-lv60-self")
# Funci贸n para transcribir audio y evaluar la fluidez del texto
def evaluate_fluency(audio):
inputs = processor(audio, return_tensors="pt", sampling_rate=16_000).input_values
with torch.no_grad():
logits = model(inputs).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.batch_decode(predicted_ids)[0]
# Evaluar fluidez (m茅trica personalizada)
fluency_score = my_custom_fluency_metric(transcription)
return transcription, fluency_score
# Funci贸n de m茅trica personalizada para evaluar fluidez
def my_custom_fluency_metric(transcription):
# Implementa tu l贸gica para evaluar la fluidez del texto generado aqu铆
# Puedes usar m茅tricas de NLP como ROUGE, BLEU o crear una m茅trica personalizada
# En este ejemplo, simplemente devuelve la longitud del texto como una m茅trica de "fluidez"
fluency_score = len(transcription.split())
return fluency_score
# Interfaz Gradio para la aplicaci贸n
audio_input = gr.inputs.Audio(source="upload", type="file")
output_text = gr.outputs.Textbox(label="Transcription")
output_score = gr.outputs.Textbox(label="Fluency Score")
gr.Interface(
fn=evaluate_fluency,
inputs=audio_input,
outputs=[output_text, output_score],
title="Audio Transcription & Fluency Evaluation",
description="Upload an audio file and evaluate transcription & fluency of the generated text."
).launch()