Spaces:

freeja
/

lab2-whisper

Runtime error

App Files Files Community

lab2-whisper / app.py

freeja

Update app.py

bb7f34f over 1 year ago

raw

history blame

2.55 kB

	import gradio as gr
	from deep_translator import GoogleTranslator
	#import deepl
	from transformers import pipeline
	import os
	from gtts import gTTS
	from pytube import YouTube

	pipe = pipeline(model="freeja/lab2-whisper-sv")

	def transcribe_audio(audio,language):
	transcribed = pipe(audio)["text"]
	result = "Transcribed text\n"
	result += transcribed + "\n"
	#result += "Translated text\n"
	#trans_text = translate_audio(transcribed,language)
	#result += trans_text
	#text_to_speech(trans_text,language)
	#result += text_to_speech
	return transcribed


	def translate_audio(text,language):
	#translate = deepl.Translator
	language_dict = {"English":"en","Spanish":"es","German":"de","French":"fr","Italian":"it"}
	lang = language_dict[language]
	translated_text = GoogleTranslator(source='sv', target=lang).translate(text)
	return translated_text

	def text_to_speech(text,language):
	language_dict = {"English":"en","Spanish":"es","German":"de","French":"fr","Italian":"it"}
	lang = language_dict[language]
	gTTS(text,lang,slow=False)

	def transcribe_video(URL):
	video = YouTube(URL)
	yt = video.streams.get_audio_only()
	yt.download()
	text = pipe(yt)["text"]
	return text

	"""iface = gr.Interface(
	fn=transcribe_audio,
	inputs=[
	gr.Audio(source="microphone", type="filepath", label="Transcribe from Microphone"),
	gr.Dropdown(["English","Spanish","Dutch","French","Italian"], value="English", label="Translate to ")
	],
	outputs="text",
	title="Whisper Small Swedish",
	description="Realtime demo for Swedish speech recognition with translation using a fine-tuned Whisper small model")"""

	video_transcription = gr.Interface(
	fn = transcribe_video,
	inputs = "text",
	outputs = "text",
	title = "Whisper Small Swedish",
	description = "Realtime demo for Swedish speech recognition with translation using a fine-tuned Whisper small model"",
	)

	audio_transcription = gr.Interface(
	fn=transcribe_audio,
	inputs=[
	gr.Audio(source="microphone", type="filepath", label="Transcribe from Microphone"),
	gr.Dropdown(["English","Spanish","Dutch","French","Italian"], value="English", label="Translate to ")
	],
	outputs="text",
	title="Whisper Small Swedish",
	description="Realtime demo for Swedish speech recognition with translation using a fine-tuned Whisper small model"",
	)

	iface = gr.TabbedInterface([audio_transcription, video_transcription], ["Transcribe Audio", "Transcribe Video"])

	iface.launch()