from transformers import pipeline
import gradio as gr
import pyewts
converter = pyewts.pyewts()
# def remove_repeated_words(text):
#     # Tokenize the input text into words
#     words = text.split()

#     # Create a dictionary to count word occurrences
#     word_count = {}

#     # Create a list to store the final words
#     new_words = []

#     for word in words:
#         # Check if the word is in the dictionary
#         if word in word_count:
#             # If it has occurred once before, add it to the list with a count of 2
#             if word_count[word] == 1:
#                 new_words.append(word)
#                 word_count[word] = 2
#         else:
#             # If it has not occurred before, add it to the dictionary with a count of 1
#             word_count[word] = 1
#             new_words.append(word)

#     result = ' '.join(new_words)
#     return result

# pipe = pipeline(model="openpecha/whisper-small",device='cuda')
# pipe = pipeline(model="TenzinGayche/whisper-small-3",device='cuda')
pipe = pipeline(model="spsither/whipser-small-r2",device='cuda')

def transcribe(microphone, upload):
    if(microphone):
       audio = microphone  
    else:
       audio = upload
    text = pipe(audio)["text"]
    # text = remove_repeated_words(text)
    state = converter.toUnicode(text)
    return state,audio

# Set the starting state to an empty string

iface = gr.Interface(
    fn=transcribe,
    inputs=[gr.Audio(source="microphone", type="filepath"),gr.Audio(source="upload", type="filepath")],
    outputs=["text","audio"],
    title="Whisper Small Tibetan",
    description="Realtime demo for Tibetan speech recognition using a fine-tuned Whisper medium model. Feedbacks: https://forms.gle/psbZnXGeBWXptkvs9",
)

iface.launch()