abidlabs's picture
abidlabs HF staff
changes
fe8d4db
raw
history blame contribute delete
No virus
2.61 kB
import gradio as gr
from pytube import YouTube
import os
from pydub import AudioSegment
import ffmpeg
def convert_to_embed_url(youtube_url):
if "youtube.com/watch?v=" in youtube_url:
video_id = youtube_url.split("v=")[1].split("&")[0]
elif "youtu.be/" in youtube_url:
video_id = youtube_url.split("youtu.be/")[1].split("?")[0]
else:
return ""
embed_url = f"https://www.youtube.com/embed/{video_id}"
return f'<iframe width="100%" height="200" src="{embed_url}" style="border-radius:10px"></iframe>'
def download_audio_from_youtube(video_url):
try:
yt = YouTube(video_url)
audio_stream = yt.streams.filter(only_audio=True).first()
downloaded_file = audio_stream.download(".")
base, _ = os.path.splitext(downloaded_file)
mp3_file = base + '.mp3'
AudioSegment.from_file(downloaded_file).export(mp3_file, format='mp3')
os.remove(downloaded_file)
return mp3_file
except Exception as e:
gr.Error(f"An error occurred: {e}")
def convert_video_to_audio(input_file):
output_file = "audio.mp3"
try:
(
ffmpeg
.input(input_file)
.output(output_file)
.run()
)
return output_file
except ffmpeg.Error as e:
gr.Error(f"An error occurred: {e}")
def transcribe_button(source):
if source == "audio":
return gr.Button("Transcribe audio πŸ“œ")
else:
return gr.Button("Transcribe video πŸ“œ")
def generate_audio(source, source_file):
if source == "audio":
audio_file = source_file
elif source == "video":
gr.Info("Converting video to audio...")
audio_file = convert_video_to_audio(source_file)
else:
gr.Info("Downloading audio from YouTube...")
audio_file = download_audio_from_youtube(source_file)
return gr.DownloadButton(value=audio_file, interactive=True)
def generate_prompt(cleanup):
prompt = "The following is a raw transcript from an automatic transcription system. "
if not cleanup:
return gr.Textbox(visible=False)
elif "Remove typos" in cleanup:
prompt += "Fix the minor typos (e.g. misspellings, homophones) in the transcript so that the transcript reads more logically. "
if "Separate into paragraphs" in cleanup:
prompt += "Separate the transcript into paragraphs to make it more readable. "
prompt += "Don't add any extra words in your response, like 'Here is the corrected transcript:' just return the final transcript."
return gr.Textbox(visible=True, value=prompt)