Spaces:

akki2825
/

accents_unplugged_deepspeech_v93

Sleeping

Update run.py

94c1047 7 months ago

No virus

1.36 kB

	from deepspeech import Model
	import gradio as gr
	import numpy as np
	import urllib.request

	model_file_path = "deepspeech-0.9.3-models.pbmm"
	lm_file_path = "deepspeech-0.9.3-models.scorer"
	url = "https://github.com/mozilla/DeepSpeech/releases/download/v0.9.3/"

	urllib.request.urlretrieve(url + model_file_path, filename=model_file_path)
	urllib.request.urlretrieve(url + lm_file_path, filename=lm_file_path)

	beam_width = 100
	lm_alpha = 0.93
	lm_beta = 1.18

	model = Model(model_file_path)
	model.enableExternalScorer(lm_file_path)
	model.setScorerAlphaBeta(lm_alpha, lm_beta)
	model.setBeamWidth(beam_width)


	def reformat_freq(sr, y):
	if sr not in (
	48000,
	16000,
	): # Deepspeech only supports 16k, (we convert 48k -> 16k)
	raise ValueError("Unsupported rate", sr)
	if sr == 48000:
	y = (
	((y / max(np.max(y), 1)) * 32767)
	.reshape((-1, 3))
	.mean(axis=1)
	.astype("int16")
	)
	sr = 16000
	return sr, y


	def transcribe(audio_file):

	text = model.stt(audio_file)
	return text


	demo = gr.Interface(
	transcribe,
	# [gr.Audio(source="microphone", streaming=True), "state"],
	gr.Audio(label="Upload Audio File", source="upload", type="filepath"),
	outputs=gr.Textbox(label="Transcript")
	)

	if __name__ == "__main__":
	demo.launch()