Spaces:
Sleeping
Sleeping
import gradio as gr | |
from pytube import YouTube | |
import os | |
from pydub import AudioSegment | |
import ffmpeg | |
def convert_to_embed_url(youtube_url): | |
if "youtube.com/watch?v=" in youtube_url: | |
video_id = youtube_url.split("v=")[1].split("&")[0] | |
elif "youtu.be/" in youtube_url: | |
video_id = youtube_url.split("youtu.be/")[1].split("?")[0] | |
else: | |
return "" | |
embed_url = f"https://www.youtube.com/embed/{video_id}" | |
return f'<iframe width="100%" height="200" src="{embed_url}" style="border-radius:10px"></iframe>' | |
def download_audio_from_youtube(video_url): | |
try: | |
yt = YouTube(video_url) | |
audio_stream = yt.streams.filter(only_audio=True).first() | |
downloaded_file = audio_stream.download(".") | |
base, _ = os.path.splitext(downloaded_file) | |
mp3_file = base + '.mp3' | |
AudioSegment.from_file(downloaded_file).export(mp3_file, format='mp3') | |
os.remove(downloaded_file) | |
return mp3_file | |
except Exception as e: | |
gr.Error(f"An error occurred: {e}") | |
def convert_video_to_audio(input_file): | |
output_file = "audio.mp3" | |
try: | |
( | |
ffmpeg | |
.input(input_file) | |
.output(output_file) | |
.run() | |
) | |
return output_file | |
except ffmpeg.Error as e: | |
gr.Error(f"An error occurred: {e}") | |
def transcribe_button(source): | |
return gr.Button("Transcribe 📜") | |
def generate_audio(source, source_file): | |
if source == "audio": | |
audio_file = source_file | |
elif source == "video": | |
gr.Info("Converting video to audio...") | |
audio_file = convert_video_to_audio(source_file) | |
else: | |
gr.Info("Downloading audio from YouTube...") | |
audio_file = download_audio_from_youtube(source_file) | |
return gr.DownloadButton(value=audio_file, interactive=True) | |
def generate_prompt(cleanup): | |
prompt = "The following is a raw transcript from an automatic transcription system. " | |
if not cleanup: | |
return gr.Textbox(visible=False), gr.Accordion(open=False) | |
elif "Remove typos" in cleanup: | |
prompt += "Fix the minor typos (e.g. misspellings, homophones) in the transcript so that the transcript reads more logically. " | |
if "Separate into paragraphs" in cleanup: | |
prompt += "Separate the transcript into paragraphs to make it more readable. " | |
prompt += "Don't add any extra words in your response, like 'Here is the corrected transcript:' just return the final transcript." | |
return gr.Textbox(visible=True, value=prompt), gr.Accordion(open=True) | |