Spaces:

meisin123
/

iban_speech_tool

Paused

App Files Files Community

iban_speech_tool / app.py

meisin123

Update app.py

36499d2 verified 6 months ago

raw

history blame contribute delete

4.5 kB

	import gradio as gr
	import os as os
	from transformers import pipeline
	import moviepy.editor as mp
	from pytube import YouTube
	import torch

	with open('styles.css', 'r') as f:
	css = f.read()

	device = "cuda:0" if torch.cuda.is_available() else "cpu"
	pipe = pipeline("automatic-speech-recognition", model="meisin123/whisper-small-iban", chunk_length_s=30, device=device)

	def transcribe(audio_file):
	#return gr.Label.update(value=text, visible=True)

	transcribed_text = pipe(audio_file, batch_size = 16)

	return transcribed_text["text"]

	def get_youtube_audio(link):

	link_object = YouTube(link)
	stream = link_object.streams.filter(only_audio=True)[0]

	extracted_audio = r"video/" + link.split("=")[-1] + ".mp3"
	if os.path.isfile(extracted_audio):
	os.remove(extracted_audio)

	stream.download(filename=extracted_audio)

	text = transcribe(extracted_audio)
	newlink= link.replace('/watch?v=', '/embed/')

	html = f'<iframe width="560" height="315" src="{newlink}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'

	return html, text


	def extract_audio(video):
	my_clip = mp.VideoFileClip(video)
	extracted_audio = r"video/" + (video.split(".")[-2]).split("/")[-1] + ".wav"

	if os.path.isfile(extracted_audio):
	os.remove(extracted_audio)
	my_clip.audio.write_audiofile(extracted_audio)

	text = transcribe(extracted_audio)
	return text


	phl = 10

	file_transcribe = gr.Interface(
	fn=transcribe,
	inputs=gr.Audio(sources="upload", type="filepath", editable=True),
	outputs=gr.Textbox(label ="Transcription", lines = 10),
	examples =["example_data/ibf_003_014.wav", "example_data/ibf_005_115.wav", "example_data/ibf_008_008.wav"]
	)

	mic_transcribe = gr.Interface(
	fn=transcribe,
	inputs=gr.Microphone(type="filepath"),
	outputs=gr.Textbox(label ="Transcription", lines=10),
	)

	video_transcribe = gr.Interface(
	fn = extract_audio,
	inputs = gr.Video(label="Video file", interactive=True),
	outputs = gr.Textbox(label ="Transcription", lines = 10),
	examples=["example_data/iban_news.mp4"]
	)
	youtube_transcribe = gr.Interface(
	fn=get_youtube_audio,
	inputs = gr.Textbox(value="https://www.youtube.com/watch?v=TzUT1dAY5MM"),
	outputs= [gr.HTML("<br>"), gr.Textbox(label ="Transcription", lines=10)])

	gr.HTML("<h3>Transcription output:</h3>")
	trans = gr.Textbox(placeholder="Your formatted transcript will appear here ...",
	lines=phl,
	max_lines=25,
	show_label=False)

	with gr.Blocks(css=css) as demo:

	# Load descriptions
	gr.HTML("<h1>Bahasa Iban Transcriber</h1>"
	"<table>"
	"<tr>"
	"<td><img src='https://scontent.fkul16-4.fna.fbcdn.net/v/t1.6435-9/107309167_3328754970510517_5906944780635912086_n.jpg?_nc_cat=107&ccb=1-7&_nc_sid=5f2048&_nc_ohc=TH6FiWE7PjkAX_7ygt3&_nc_ht=scontent.fkul16-4.fna&oh=00_AfCDxXKWu4V_LBws5kV0pxjfuNIa9PJEi_IRiy51IeoJIg&oe=662B027B' className='w-11 h-11 rounded-full'></td>"
	"<td>The Iban language is spoken by the Iban, one of the Dayak ethnic groups, who live in Brunei, the Indonesian province of West Kalimantan and in the Malaysian state of Sarawak. It belongs to the Malayic subgroup, a Malayo-Polynesian branch of the Austronesian language family.</td>"
	"</tr>"
	"</table>"
	"<br>"
	"<h3 class='title'>Helping you understand Bahasa Iban</h3>"
	"<br>"
	"<p>This AI enabled tool allows you to </p>"
	"<ul>"
	"<li>1) Transcribe Iban (from audio OR video sources) to text.</li>"
	"<li>2) Translate transcribed Iban to English (coming soon)</li>"
	"</ul>"
	"<h2>Transcribe:</h2>"
	"<h3> Audio Source</h3>")


	gr.TabbedInterface(
	[file_transcribe, mic_transcribe],
	["Audio File", "Record from Microphone"],
	)

	gr.HTML("<h3> a) Video Source</h3>")
	gr.TabbedInterface(
	[video_transcribe, youtube_transcribe],
	["Video File", "From Youtube"],
	)

	gr.HTML("<br><h2>Translation to English:</h2>"
	"<p> Translation functionality is not available yet!")

	demo.launch()