Spaces:
Sleeping
Sleeping
from transformers import pipeline, WhisperModel | |
import gradio as gr | |
import pandas as pd | |
import string | |
pipe = pipeline(model="matteocirca/whisper-small-it-2",return_timestamps="word") | |
current_audio = None | |
segments = {} | |
def audio2segments(audio,word): | |
global segments,current_audio | |
if audio != current_audio or current_audio == None: | |
segments = pipe(audio) | |
current_audio = audio | |
if not word: | |
if current_audio != None: | |
return segments["text"],"<html><h1>No Word inserted!</h1></html>" | |
else: | |
return "","<html><h1>No Word inserted!</h1></html>" | |
df = pd.DataFrame(columns=["Occurrence n","Starting TimeStamp","Ending TimeStamp"]) | |
if word: | |
ranges_list = [] | |
ranges = [] | |
print(segments) | |
for w in segments['chunks']: | |
if word == w["text"].translate(str.maketrans('', '', string.punctuation)).replace(" ","").lower() : | |
ranges_list.append(w["timestamp"]) | |
res = "<table><thead><tr><th>Occurrence n°</th><th>Start</th><th>End</th></tr></thead><tbody>" | |
for i,r in enumerate(ranges_list): | |
# ranges_list.append({"Occurrence n":i,"Starting TimeStamp":r[0],"Ending TimeStamp":r[1]}) | |
res += f"<tr><td>{i}</td><td>{r[0]}</td><td>{r[1]}</td></tr>" | |
res+=" </tbody></table>" | |
print(res) | |
return segments["text"],res | |
def clear(): | |
segments = {} | |
iface = gr.Interface( | |
fn=audio2segments, | |
inputs=[gr.Audio(sources=["upload","microphone"], type="filepath"),"text"], | |
outputs=["text","html"], | |
title="Whisper Small Italian", | |
description="Realtime demo for Italian speech recognition using a fine-tuned Whisper small model.", | |
) | |
iface.launch() | |