spsither's picture
Update app.py
b7f7313
from transformers import pipeline
import gradio as gr
import pyewts
converter = pyewts.pyewts()
# def remove_repeated_words(text):
# # Tokenize the input text into words
# words = text.split()
# # Create a dictionary to count word occurrences
# word_count = {}
# # Create a list to store the final words
# new_words = []
# for word in words:
# # Check if the word is in the dictionary
# if word in word_count:
# # If it has occurred once before, add it to the list with a count of 2
# if word_count[word] == 1:
# new_words.append(word)
# word_count[word] = 2
# else:
# # If it has not occurred before, add it to the dictionary with a count of 1
# word_count[word] = 1
# new_words.append(word)
# result = ' '.join(new_words)
# return result
# pipe = pipeline(model="openpecha/whisper-small",device='cuda')
# pipe = pipeline(model="TenzinGayche/whisper-small-3",device='cuda')
pipe = pipeline(model="spsither/whipser-small-r2",device='cuda')
def transcribe(microphone, upload):
if(microphone):
audio = microphone
else:
audio = upload
text = pipe(audio)["text"]
# text = remove_repeated_words(text)
state = converter.toUnicode(text)
return state,audio
# Set the starting state to an empty string
iface = gr.Interface(
fn=transcribe,
inputs=[gr.Audio(source="microphone", type="filepath"),gr.Audio(source="upload", type="filepath")],
outputs=["text","audio"],
title="Whisper Small Tibetan",
description="Realtime demo for Tibetan speech recognition using a fine-tuned Whisper medium model. Feedbacks: https://forms.gle/psbZnXGeBWXptkvs9",
)
iface.launch()