Spaces:

matteocirca
/

ASR-app-pro

Sleeping

ASR-app-pro / app.py

Update app

1dceb84 over 1 year ago

1.78 kB

	from transformers import pipeline, WhisperModel
	import gradio as gr
	import pandas as pd
	import string

	pipe = pipeline(model="matteocirca/whisper-small-it-2",return_timestamps="word")
	current_audio = None
	segments = {}

	def audio2segments(audio,word):
	global segments,current_audio

	if audio != current_audio or current_audio == None:
	segments = pipe(audio)
	current_audio = audio
	if not word:
	if current_audio != None:
	return segments["text"],"<html><h1>No Word inserted!</h1></html>"
	else:
	return "","<html><h1>No Word inserted!</h1></html>"
	df = pd.DataFrame(columns=["Occurrence n","Starting TimeStamp","Ending TimeStamp"])

	if word:
	ranges_list = []
	ranges = []
	print(segments)

	for w in segments['chunks']:
	if word == w["text"].translate(str.maketrans('', '', string.punctuation)).replace(" ","").lower() :
	ranges_list.append(w["timestamp"])
	res = "<table><thead><tr><th>Occurrence n°</th><th>Start</th><th>End</th></tr></thead><tbody>"


	for i,r in enumerate(ranges_list):
	# ranges_list.append({"Occurrence n":i,"Starting TimeStamp":r[0],"Ending TimeStamp":r[1]})
	res += f"<tr><td>{i}</td><td>{r[0]}</td><td>{r[1]}</td></tr>"

	res+=" </tbody></table>"
	print(res)
	return segments["text"],res

	def clear():
	segments = {}


	iface = gr.Interface(
	fn=audio2segments,
	inputs=[gr.Audio(sources=["upload","microphone"], type="filepath"),"text"],
	outputs=["text","html"],
	title="Whisper Small Italian",
	description="Realtime demo for Italian speech recognition using a fine-tuned Whisper small model.",
	)


	iface.launch()