meisin123's picture
Update app.py
36499d2 verified
import gradio as gr
import os as os
from transformers import pipeline
import moviepy.editor as mp
from pytube import YouTube
import torch
with open('styles.css', 'r') as f:
css = f.read()
device = "cuda:0" if torch.cuda.is_available() else "cpu"
pipe = pipeline("automatic-speech-recognition", model="meisin123/whisper-small-iban", chunk_length_s=30, device=device)
def transcribe(audio_file):
#return gr.Label.update(value=text, visible=True)
transcribed_text = pipe(audio_file, batch_size = 16)
return transcribed_text["text"]
def get_youtube_audio(link):
link_object = YouTube(link)
stream = link_object.streams.filter(only_audio=True)[0]
extracted_audio = r"video/" + link.split("=")[-1] + ".mp3"
if os.path.isfile(extracted_audio):
os.remove(extracted_audio)
stream.download(filename=extracted_audio)
text = transcribe(extracted_audio)
newlink= link.replace('/watch?v=', '/embed/')
html = f'<iframe width="560" height="315" src="{newlink}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
return html, text
def extract_audio(video):
my_clip = mp.VideoFileClip(video)
extracted_audio = r"video/" + (video.split(".")[-2]).split("/")[-1] + ".wav"
if os.path.isfile(extracted_audio):
os.remove(extracted_audio)
my_clip.audio.write_audiofile(extracted_audio)
text = transcribe(extracted_audio)
return text
phl = 10
file_transcribe = gr.Interface(
fn=transcribe,
inputs=gr.Audio(sources="upload", type="filepath", editable=True),
outputs=gr.Textbox(label ="Transcription", lines = 10),
examples =["example_data/ibf_003_014.wav", "example_data/ibf_005_115.wav", "example_data/ibf_008_008.wav"]
)
mic_transcribe = gr.Interface(
fn=transcribe,
inputs=gr.Microphone(type="filepath"),
outputs=gr.Textbox(label ="Transcription", lines=10),
)
video_transcribe = gr.Interface(
fn = extract_audio,
inputs = gr.Video(label="Video file", interactive=True),
outputs = gr.Textbox(label ="Transcription", lines = 10),
examples=["example_data/iban_news.mp4"]
)
youtube_transcribe = gr.Interface(
fn=get_youtube_audio,
inputs = gr.Textbox(value="https://www.youtube.com/watch?v=TzUT1dAY5MM"),
outputs= [gr.HTML("<br>"), gr.Textbox(label ="Transcription", lines=10)])
gr.HTML("<h3>Transcription output:</h3>")
trans = gr.Textbox(placeholder="Your formatted transcript will appear here ...",
lines=phl,
max_lines=25,
show_label=False)
with gr.Blocks(css=css) as demo:
# Load descriptions
gr.HTML("<h1>Bahasa Iban Transcriber</h1>"
"<table>"
"<tr>"
"<td><img src='https://scontent.fkul16-4.fna.fbcdn.net/v/t1.6435-9/107309167_3328754970510517_5906944780635912086_n.jpg?_nc_cat=107&ccb=1-7&_nc_sid=5f2048&_nc_ohc=TH6FiWE7PjkAX_7ygt3&_nc_ht=scontent.fkul16-4.fna&oh=00_AfCDxXKWu4V_LBws5kV0pxjfuNIa9PJEi_IRiy51IeoJIg&oe=662B027B' className='w-11 h-11 rounded-full'></td>"
"<td>The Iban language is spoken by the Iban, one of the Dayak ethnic groups, who live in Brunei, the Indonesian province of West Kalimantan and in the Malaysian state of Sarawak. It belongs to the Malayic subgroup, a Malayo-Polynesian branch of the Austronesian language family.</td>"
"</tr>"
"</table>"
"<br>"
"<h3 class='title'>Helping you understand Bahasa Iban</h3>"
"<br>"
"<p>This AI enabled tool allows you to </p>"
"<ul>"
"<li>1) Transcribe Iban (from audio OR video sources) to text.</li>"
"<li>2) Translate transcribed Iban to English (coming soon)</li>"
"</ul>"
"<h2>Transcribe:</h2>"
"<h3> Audio Source</h3>")
gr.TabbedInterface(
[file_transcribe, mic_transcribe],
["Audio File", "Record from Microphone"],
)
gr.HTML("<h3> a) Video Source</h3>")
gr.TabbedInterface(
[video_transcribe, youtube_transcribe],
["Video File", "From Youtube"],
)
gr.HTML("<br><h2>Translation to English:</h2>"
"<p> Translation functionality is not available yet!")
demo.launch()