from transformers import pipeline import gradio as gr import pyewts converter = pyewts.pyewts() # def remove_repeated_words(text): # # Tokenize the input text into words # words = text.split() # # Create a dictionary to count word occurrences # word_count = {} # # Create a list to store the final words # new_words = [] # for word in words: # # Check if the word is in the dictionary # if word in word_count: # # If it has occurred once before, add it to the list with a count of 2 # if word_count[word] == 1: # new_words.append(word) # word_count[word] = 2 # else: # # If it has not occurred before, add it to the dictionary with a count of 1 # word_count[word] = 1 # new_words.append(word) # result = ' '.join(new_words) # return result # pipe = pipeline(model="openpecha/whisper-small",device='cuda') # pipe = pipeline(model="TenzinGayche/whisper-small-3",device='cuda') pipe = pipeline(model="spsither/whipser-small-r2",device='cuda') def transcribe(microphone, upload): if(microphone): audio = microphone else: audio = upload text = pipe(audio)["text"] # text = remove_repeated_words(text) state = converter.toUnicode(text) return state,audio # Set the starting state to an empty string iface = gr.Interface( fn=transcribe, inputs=[gr.Audio(source="microphone", type="filepath"),gr.Audio(source="upload", type="filepath")], outputs=["text","audio"], title="Whisper Small Tibetan", description="Realtime demo for Tibetan speech recognition using a fine-tuned Whisper medium model. Feedbacks: https://forms.gle/psbZnXGeBWXptkvs9", ) iface.launch()