Spaces:
Paused
Paused
import gradio as gr | |
import os as os | |
from transformers import pipeline | |
import moviepy.editor as mp | |
from pytube import YouTube | |
import torch | |
with open('styles.css', 'r') as f: | |
css = f.read() | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
pipe = pipeline("automatic-speech-recognition", model="meisin123/whisper-small-iban", chunk_length_s=30, device=device) | |
def transcribe(audio_file): | |
#return gr.Label.update(value=text, visible=True) | |
transcribed_text = pipe(audio_file, batch_size = 16) | |
return transcribed_text["text"] | |
def get_youtube_audio(link): | |
link_object = YouTube(link) | |
stream = link_object.streams.filter(only_audio=True)[0] | |
extracted_audio = r"video/" + link.split("=")[-1] + ".mp3" | |
if os.path.isfile(extracted_audio): | |
os.remove(extracted_audio) | |
stream.download(filename=extracted_audio) | |
text = transcribe(extracted_audio) | |
newlink= link.replace('/watch?v=', '/embed/') | |
html = f'<iframe width="560" height="315" src="{newlink}" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>' | |
return html, text | |
def extract_audio(video): | |
my_clip = mp.VideoFileClip(video) | |
extracted_audio = r"video/" + (video.split(".")[-2]).split("/")[-1] + ".wav" | |
if os.path.isfile(extracted_audio): | |
os.remove(extracted_audio) | |
my_clip.audio.write_audiofile(extracted_audio) | |
text = transcribe(extracted_audio) | |
return text | |
phl = 10 | |
file_transcribe = gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio(sources="upload", type="filepath", editable=True), | |
outputs=gr.Textbox(label ="Transcription", lines = 10), | |
examples =["example_data/ibf_003_014.wav", "example_data/ibf_005_115.wav", "example_data/ibf_008_008.wav"] | |
) | |
mic_transcribe = gr.Interface( | |
fn=transcribe, | |
inputs=gr.Microphone(type="filepath"), | |
outputs=gr.Textbox(label ="Transcription", lines=10), | |
) | |
video_transcribe = gr.Interface( | |
fn = extract_audio, | |
inputs = gr.Video(label="Video file", interactive=True), | |
outputs = gr.Textbox(label ="Transcription", lines = 10), | |
examples=["example_data/iban_news.mp4"] | |
) | |
youtube_transcribe = gr.Interface( | |
fn=get_youtube_audio, | |
inputs = gr.Textbox(value="https://www.youtube.com/watch?v=TzUT1dAY5MM"), | |
outputs= [gr.HTML("<br>"), gr.Textbox(label ="Transcription", lines=10)]) | |
gr.HTML("<h3>Transcription output:</h3>") | |
trans = gr.Textbox(placeholder="Your formatted transcript will appear here ...", | |
lines=phl, | |
max_lines=25, | |
show_label=False) | |
with gr.Blocks(css=css) as demo: | |
# Load descriptions | |
gr.HTML("<h1>Bahasa Iban Transcriber</h1>" | |
"<table>" | |
"<tr>" | |
"<td><img src='https://scontent.fkul16-4.fna.fbcdn.net/v/t1.6435-9/107309167_3328754970510517_5906944780635912086_n.jpg?_nc_cat=107&ccb=1-7&_nc_sid=5f2048&_nc_ohc=TH6FiWE7PjkAX_7ygt3&_nc_ht=scontent.fkul16-4.fna&oh=00_AfCDxXKWu4V_LBws5kV0pxjfuNIa9PJEi_IRiy51IeoJIg&oe=662B027B' className='w-11 h-11 rounded-full'></td>" | |
"<td>The Iban language is spoken by the Iban, one of the Dayak ethnic groups, who live in Brunei, the Indonesian province of West Kalimantan and in the Malaysian state of Sarawak. It belongs to the Malayic subgroup, a Malayo-Polynesian branch of the Austronesian language family.</td>" | |
"</tr>" | |
"</table>" | |
"<br>" | |
"<h3 class='title'>Helping you understand Bahasa Iban</h3>" | |
"<br>" | |
"<p>This AI enabled tool allows you to </p>" | |
"<ul>" | |
"<li>1) Transcribe Iban (from audio OR video sources) to text.</li>" | |
"<li>2) Translate transcribed Iban to English (coming soon)</li>" | |
"</ul>" | |
"<h2>Transcribe:</h2>" | |
"<h3> Audio Source</h3>") | |
gr.TabbedInterface( | |
[file_transcribe, mic_transcribe], | |
["Audio File", "Record from Microphone"], | |
) | |
gr.HTML("<h3> a) Video Source</h3>") | |
gr.TabbedInterface( | |
[video_transcribe, youtube_transcribe], | |
["Video File", "From Youtube"], | |
) | |
gr.HTML("<br><h2>Translation to English:</h2>" | |
"<p> Translation functionality is not available yet!") | |
demo.launch() |