Spaces:

thak123
/

Whisper-Konkani

Running

App Files Files Community

thak123 commited on 11 days ago

Commit

8a1e498

•

1 Parent(s): 7fb921e

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -66

app.py CHANGED Viewed

@@ -1,75 +1,105 @@
-from transformers import WhisperTokenizer
-import os
-tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small") #, language="marathi", task="transcribe"
-from transformers import pipeline
-import gradio as gr
-import torch
-pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"thak123/whisper-small-gom",
-                task="automatic-speech-recognition", tokenizer= tokenizer)  # change to "your-username/the-name-you-picked"
-# pipe.model.config.forced_decoder_ids = (
-#         pipe.tokenizer.get_decoder_prompt_ids(
-#             language="marathi", task="transcribe"
-#         )
 #     )
-def transcribe_speech(filepath):
-    output = pipe(
-        filepath,
-        max_new_tokens=256,
-        generate_kwargs={
-            "task": "transcribe",
-            "language": "konkani",
-        },  # update with the language you've fine-tuned on
-        chunk_length_s=30,
-        batch_size=8,
-        padding=True
-    )
-    return output["text"]
-demo = gr.Blocks()
-mic_transcribe = gr.Interface(
-    fn=transcribe_speech,
-    inputs=gr.Audio(sources="microphone", type="filepath"),
-    outputs=gr.components.Textbox(),
-)
-file_transcribe = gr.Interface(
-    fn=transcribe_speech,
-    inputs=gr.Audio(sources="upload", type="filepath"),
-    outputs=gr.components.Textbox(),
-)
-with demo:
-    gr.TabbedInterface(
-        [mic_transcribe, file_transcribe],
-        ["Transcribe Microphone", "Transcribe Audio File"],
-    )
-demo.launch(debug=True)
-# def transcribe(audio):
-#     # text = pipe(audio)["text"]
-#     # pipe(audio)
-#     text = pipe(audio)
-#     print("op",text)
-#     return text#pipe(audio) #text
-# iface = gr.Interface(
-#     fn=transcribe,
-#     inputs=[gr.Audio(sources=["microphone", "upload"])],
-#     outputs="text",
-#     examples=[
-#         [os.path.join(os.path.dirname("."),"audio/chalyaami.mp3")],
-#         [os.path.join(os.path.dirname("."),"audio/ekdonteen.flac")],
-#         [os.path.join(os.path.dirname("."),"audio/heyatachadjaale.mp3")],
-#     ],
-#     title="Whisper Konkani",
-#     description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
 # )
-# iface.launch()

+# from transformers import WhisperTokenizer
+# import os
+# tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small") #, language="marathi", task="transcribe"
+# from transformers import pipeline
+# import gradio as gr
+# import torch
+# pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"thak123/whisper-small-gom",
+#                 task="automatic-speech-recognition", tokenizer= tokenizer)  # change to "your-username/the-name-you-picked"
+# # pipe.model.config.forced_decoder_ids = (
+# #         pipe.tokenizer.get_decoder_prompt_ids(
+# #             language="marathi", task="transcribe"
+# #         )
+# #     )
+# def transcribe_speech(filepath):
+#     output = pipe(
+#         filepath,
+#         max_new_tokens=256,
+#         generate_kwargs={
+#             "task": "transcribe",
+#             "language": "konkani",
+#         },  # update with the language you've fine-tuned on
+#         chunk_length_s=30,
+#         batch_size=8,
+#         padding=True
 #     )
+#     return output["text"]
+# demo = gr.Blocks()
+# mic_transcribe = gr.Interface(
+#     fn=transcribe_speech,
+#     inputs=gr.Audio(sources="microphone", type="filepath"),
+#     outputs=gr.components.Textbox(),
 # )
+# file_transcribe = gr.Interface(
+#     fn=transcribe_speech,
+#     inputs=gr.Audio(sources="upload", type="filepath"),
+#     outputs=gr.components.Textbox(),
+# )
+# with demo:
+#     gr.TabbedInterface(
+#         [mic_transcribe, file_transcribe],
+#         ["Transcribe Microphone", "Transcribe Audio File"],
+#     )
+# demo.launch(debug=True)
+# # def transcribe(audio):
+# #     # text = pipe(audio)["text"]
+# #     # pipe(audio)
+# #     text = pipe(audio)
+# #     print("op",text)
+# #     return text#pipe(audio) #text
+# # iface = gr.Interface(
+# #     fn=transcribe,
+# #     inputs=[gr.Audio(sources=["microphone", "upload"])],
+# #     outputs="text",
+# #     examples=[
+# #         [os.path.join(os.path.dirname("."),"audio/chalyaami.mp3")],
+# #         [os.path.join(os.path.dirname("."),"audio/ekdonteen.flac")],
+# #         [os.path.join(os.path.dirname("."),"audio/heyatachadjaale.mp3")],
+# #     ],
+# #     title="Whisper Konkani",
+# #     description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
+# # )
+# # iface.launch()
+from transformers import WhisperTokenizer, pipeline
+import gradio as gr
+import os
+tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small", language="marathi", task="transcribe")
+pipe = pipeline(model="thak123/gom-stt-v3", task="automatic-speech-recognition", tokenizer=tokenizer)
+def transcribe(audio):
+    result = pipe(audio)
+    text = result[0]['text']
+    print("op", text)
+    return text
+iface = gr.Interface(
+    fn=transcribe,
+    inputs=[gr.Audio(sources=["microphone", "upload"])],
+    outputs="text",
+    examples=[
+        [os.path.join(os.path.dirname("."), "audio/chalyaami.mp3")],
+        [os.path.join(os.path.dirname("."), "audio/ekdonteen.flac")],
+        [os.path.join(os.path.dirname("."), "audio/heyatachadjaale.mp3")],
+    ],
+    title="Whisper Konkani",
+    description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
+)
+iface.launch()