Spaces:

hugoiabd
/

prueba-lara-equipo-3

Runtime error

File size: 1,571 Bytes

64db4c7
 
 
 
 
 
 
 
9c94255
64db4c7
28bbb3d
64db4c7
 
 
 
 
 
 
28bbb3d
b63a826
 
28bbb3d
b63a826
 
9c94255
b63a826
64db4c7
 
 
b63a826
ff97a84
64db4c7
 
 
 
 
 
28bbb3d
b63a826
28bbb3d
b63a826
9c94255
b63a826
64db4c7
 
28bbb3d

import gradio as gr
import torch
from transformers import pipeline
import numpy as np
import time

pipe_base = pipeline("automatic-speech-recognition", model="aitor-medrano/lara-base-pushed")
pipe_small = pipeline("automatic-speech-recognition", model="aitor-medrano/whisper-small-lara")
pipe_base_1600 = pipeline("automatic-speech-recognition", model="aitor-medrano/whisper-base-lara-1600")

def greet(grabacion):
    inicio = time.time()

    sr, y = grabacion
    # Pasamos el array de muestras a tipo NumPy de 32 bits
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))

    result_base = "base:" + pipe_base({"sampling_rate": sr, "raw": y})["text"]
    fin_base = time.time()
    
    result_small = "small:" + pipe_small({"sampling_rate": sr, "raw": y})["text"]
    fin_small = time.time()
    
    result_base_1600 = "base_2000:" + pipe_base_1600({"sampling_rate": sr, "raw": y})["text"]
    fin_1600 = time.time()

    fin = time.time()

    return result_base, fin_base - inicio, result_small, fin_small - inicio, result_base_1600, fin_1600 - inicio, fin - inicio
    #return result_base, result_small, fin - inicio

demo = gr.Interface(fn=greet,
        inputs=[
                gr.Audio(),
        ],
        outputs=[
            gr.Text(label="Salida (Base)"),
            gr.Number(label="Tiempo (Base)")
            gr.Text(label="Salida (Small)"),
            gr.Number(label="Tiempo (Small)")
            gr.Text(label="Salida (Base 1600)"),
            gr.Number(label="Tiempo (1600)")
            gr.Number(label="Tiempo")
        ])
demo.launch()