Spaces:

thak123
/

Whisper-Konkani

Sleeping

File size: 2,182 Bytes

from transformers import WhisperTokenizer
import os
tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small") #, language="marathi", task="transcribe"

from transformers import pipeline
import gradio as gr
import torch 

pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"thak123/whisper-small-gom", 
                task="automatic-speech-recognition", tokenizer= tokenizer)  # change to "your-username/the-name-you-picked"

# pipe.model.config.forced_decoder_ids = (
#         pipe.tokenizer.get_decoder_prompt_ids(
#             language="marathi", task="transcribe"
#         )
#     )

def transcribe_speech(filepath):
    output = pipe(
        filepath,
        max_new_tokens=256,
        generate_kwargs={
            "task": "transcribe",
            "language": "konkani",
        },  # update with the language you've fine-tuned on
        chunk_length_s=30,
        batch_size=8,
        padding=True
    )
    return output["text"]


demo = gr.Blocks()

mic_transcribe = gr.Interface(
    fn=transcribe_speech,
    inputs=gr.Audio(sources="microphone", type="filepath"),
    outputs=gr.components.Textbox(),
)

file_transcribe = gr.Interface(
    fn=transcribe_speech,
    inputs=gr.Audio(sources="upload", type="filepath"),
    outputs=gr.components.Textbox(),
)
with demo:
    gr.TabbedInterface(
        [mic_transcribe, file_transcribe],
        ["Transcribe Microphone", "Transcribe Audio File"],
    )

demo.launch(debug=True)

# def transcribe(audio):
#     # text = pipe(audio)["text"]
#     # pipe(audio)
#     text = pipe(audio)
#     print("op",text)
#     return text#pipe(audio) #text

# iface = gr.Interface(
#     fn=transcribe, 
#     inputs=[gr.Audio(sources=["microphone", "upload"])], 
#     outputs="text",
#     examples=[
#         [os.path.join(os.path.dirname("."),"audio/chalyaami.mp3")],
#         [os.path.join(os.path.dirname("."),"audio/ekdonteen.flac")],
#         [os.path.join(os.path.dirname("."),"audio/heyatachadjaale.mp3")],
#     ],
#     title="Whisper Konkani",
#     description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
# )


# iface.launch()