Spaces:

neuralleap
/

speechtotext

Sleeping

App Files Files Community

speechtotext / app.py

neuralleap

Update app.py

4b48704 7 months ago

raw history blame contribute delete

No virus

1.79 kB

	#This code for CPU
	#import torch
	#from transformers import AutoTokenizer, TextStreamer, pipeline
	import whisper
	#transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en")
	model = whisper.load_model("small")
	import gradio as gr
	import time
	import googletrans
	from googletrans import Translator
	translator = Translator()
	lan = googletrans.LANGUAGES
	#print(lan)
	keys = list(lan.keys())
	vals = list(lan.values())

	def transcribe(lang,audio):
	#time.sleep(3)
	print(lang)
	# load audio and pad/trim it to fit 30 seconds
	audio = whisper.load_audio(audio)
	audio = whisper.pad_or_trim(audio)

	# make log_Mel spectrogram and move to the same device as the model
	mel = whisper.log_mel_spectrogram(audio).to(model.device)

	# detect the spoken language
	_, probs = model.detect_language(mel)
	#print(f"Detected language: {max(probs, key=probs.get)}")

	# decode the audio
	options = whisper.DecodingOptions()
	result = whisper.decode(model, mel, options, fp16=False)

	lang = lang.lower()
	#state += translator.translate(result.text,dest=keys[vals.index(lang)]).text + " "
	return translator.translate(result.text,dest=keys[vals.index(lang)]).text

	def clear(msg):
	return ""

	with gr.Blocks() as demo:
	state = gr.State(value="")
	audio = gr.Audio(label="press start record to speek",source="microphone", type="filepath")
	dropdown = gr.Dropdown(label="first select the destination language",choices=vals)
	msg = gr.Textbox()
	clearBTN = gr.Button("Clear")
	dropdown.select(transcribe, [dropdown,audio], outputs=[msg])

	#audio.stream(transcribe, [dropdown,audio,state], outputs=[msg,state])
	#audio.stop_recording(clear, [state], outputs=[msg,state])
	clearBTN.click(clear, [msg], outputs=[msg])
	demo.launch(share=True)