Spaces:

thak123
/

Whisper-Konkani

Running

App Files Files Community

Whisper-Konkani / app.py

thak123

Update app.py

7fb921e verified 11 days ago

raw

history blame

2.18 kB

	from transformers import WhisperTokenizer
	import os
	tokenizer = WhisperTokenizer.from_pretrained("openai/whisper-small") #, language="marathi", task="transcribe"

	from transformers import pipeline
	import gradio as gr
	import torch

	pipe = pipeline(model="thak123/gom-stt-v3", #"thak123/whisper-small-LDC-V1", #"thak123/whisper-small-gom",
	task="automatic-speech-recognition", tokenizer= tokenizer) # change to "your-username/the-name-you-picked"

	# pipe.model.config.forced_decoder_ids = (
	# pipe.tokenizer.get_decoder_prompt_ids(
	# language="marathi", task="transcribe"
	# )
	# )

	def transcribe_speech(filepath):
	output = pipe(
	filepath,
	max_new_tokens=256,
	generate_kwargs={
	"task": "transcribe",
	"language": "konkani",
	}, # update with the language you've fine-tuned on
	chunk_length_s=30,
	batch_size=8,
	padding=True
	)
	return output["text"]


	demo = gr.Blocks()

	mic_transcribe = gr.Interface(
	fn=transcribe_speech,
	inputs=gr.Audio(sources="microphone", type="filepath"),
	outputs=gr.components.Textbox(),
	)

	file_transcribe = gr.Interface(
	fn=transcribe_speech,
	inputs=gr.Audio(sources="upload", type="filepath"),
	outputs=gr.components.Textbox(),
	)
	with demo:
	gr.TabbedInterface(
	[mic_transcribe, file_transcribe],
	["Transcribe Microphone", "Transcribe Audio File"],
	)

	demo.launch(debug=True)

	# def transcribe(audio):
	# # text = pipe(audio)["text"]
	# # pipe(audio)
	# text = pipe(audio)
	# print("op",text)
	# return text#pipe(audio) #text

	# iface = gr.Interface(
	# fn=transcribe,
	# inputs=[gr.Audio(sources=["microphone", "upload"])],
	# outputs="text",
	# examples=[
	# [os.path.join(os.path.dirname("."),"audio/chalyaami.mp3")],
	# [os.path.join(os.path.dirname("."),"audio/ekdonteen.flac")],
	# [os.path.join(os.path.dirname("."),"audio/heyatachadjaale.mp3")],
	# ],
	# title="Whisper Konkani",
	# description="Realtime demo for Konkani speech recognition using a fine-tuned Whisper small model.",
	# )


	# iface.launch()