Spaces:

FriendlyUser
/

YoutubeDownloaderSubber

Runtime error

File size: 5,550 Bytes

d903faf
 
 
d255f96
d903faf
4de615e
8377508
d903faf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4de615e
 
 
 
 
 
 
d903faf
 
d255f96
d903faf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1a0091
 
 
d903faf
d1a0091
 
 
 
 
 
 
 
d1f9d6f
d903faf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2079594
 
 
 
 
 
 
 
 
 
d903faf
 
 
 
4865ee0
d903faf
 
4de615e
d903faf
4de615e
d903faf
 
 
 
 
 
 
 
 
 
 
 
 
 
8377508
 
 
4de615e
d903faf
01d4c13
 
 
b273369
01d4c13
fe8b1e3
 
 
b273369
01d4c13
b273369
 
ebe7599
f5d0648
01d4c13
d903faf
 
 
 
 
4865ee0
d903faf
 
4865ee0
 
d903faf
4865ee0
 
d903faf
3d8816a
d903faf

import whisper
import gradio as gr
import ffmpeg
from yt_dlp import YoutubeDL
import os
import sys
from subprocess import PIPE, run

youtube_livestream_codes = [
    91,
    92,
    93,
    94,
    95,
    96,
    300,
    301,
]
youtube_mp4_codes = [
    298,
    18,
    22,
    140,
    133,
    134
]

def second_to_timecode(x: float) -> str:
    hour, x = divmod(x, 3600)
    minute, x = divmod(x, 60)
    second, x = divmod(x, 1)
    millisecond = int(x * 1000.)

    return '%.2d:%.2d:%.2d,%.3d' % (hour, minute, second, millisecond)

def get_video_metadata(video_url: str = "https://www.youtube.com/watch?v=21X5lGlDOfg&ab_channel=NASA")-> dict:
    with YoutubeDL({'outtmpl': '%(id)s.%(ext)s'}) as ydl:
        info_dict = ydl.extract_info(video_url, download=False)
        video_title = info_dict.get('title', None)
        uploader_id = info_dict.get('uploader_id', None)
        print(f"[youtube] {video_title}: {uploader_id}")
    return info_dict


def parse_metadata(metadata) -> dict:
    """
    Parse metadata and send to discord.
    After a video is done recording, 
    it will have both the livestream format and the mp4 format.
    """
    # send metadata to discord
    formats = metadata.get("formats", [])
    # filter for ext = mp4
    mp4_formats = [f for f in formats if f.get("ext", "") == "mp4"]
    try:
        format_ids = [int(f.get("format_id", 0)) for f in mp4_formats]
        video_entries = sorted(set(format_ids).intersection(youtube_mp4_codes))

        is_livestream = True
        if len(video_entries) > 0:
            # use video format id over livestream id if available
            selected_id = video_entries[0]
            is_livestream = False
    except Exception as e:
        print(e)
        selected_id = mp4_formats[0].get("format_id")
        is_livestream = False


    return {
        "selected_id": selected_id,
        "is_livestream": is_livestream,
    }

def get_video(url: str, config: dict):
    """
    Get video from start time.
    """
    # result = subprocess.run()
    # could delay start time by a few seconds to just sync up and capture the full video length
    # but would need to time how long it takes to fetch the video using youtube-dl and other adjustments and start a bit before
    filename = config.get("filename", "livestream01.mp4")
    end = config.get("end", "00:15:00")
    overlay_file = ffmpeg.input(filename)
    (
        ffmpeg
        .input(url, t=end)
        .output(filename)
        .run()
    )

def get_all_files(url: str, end: str = "00:15:00"):
    metadata = get_video_metadata(url)
    temp_dict = parse_metadata(metadata)
    selected_id = temp_dict.get("selected_id", 0)
    formats = metadata.get("formats", [])
    selected_format = [f for f in formats if f.get("format_id", "") == str(selected_id)][0]
    format_url = selected_format.get("url", "")
    filename = "temp.mp4"
    get_video(format_url, {"filename": filename, "end": end})
    return filename

def get_text_from_mp3_whisper(inputType:str, mp3_file: str, url_path: str, taskName: str, srcLanguage: str)->str:
    # remove the file if it exists
    if os.path.exists("transcript.srt"):
        os.remove("transcript.srt")
    
    if os.path.exists("temp.mp4"):
        os.remove("temp.mp4")
    
    if os.path.exists("subtitled.mp4"):
        os.remove("subtitled.mp4")
    
    model = whisper.load_model("medium")
    # options = whisper.DecodingOptions(language="en", without_timestamps=True)
    options = dict(language=srcLanguage)
    transcribe_options = dict(task=taskName, **options)
    # return if url_path is not set, taskName is not set, srcLanguage is not set
    if inputType == "url":
        filename = get_all_files(url_path)
        print("Retrieved the file")
        result = model.transcribe(filename, **transcribe_options)
        print("transcribing the file")
    else:
        result = model.transcribe(mp3_file, **transcribe_options)
    # adjust for spacy mode
    html_text = ""
    lines = []
    for count, segment in enumerate(result.get("segments")):
        # print(segment)
        start = segment.get("start")
        end = segment.get("end")
        lines.append(f"{count}")
        lines.append(f"{second_to_timecode(start)} --> {second_to_timecode(end)}")
        lines.append(segment.get("text", "").strip())
        lines.append('')
    words = '\n'.join(lines)
    # save to transcript.srt
    with open("transcript.srt", "w") as f:
        f.write(words)
    print("done transcribing")

    input_file = 'temp.mp4'
    subtitles_file = 'transcript.srt'
    output_file = 'subtitled.mp4'
    try:
        print("attempt to output file")
        video = ffmpeg.input(input_file)
        audio = video.audio
        ffmpeg.concat(video.filter("subtitles", subtitles_file), audio, v=1, a=1).output(output_file).run()
    except Exception as e:
        print("failed to output file")
        print(e)
        output_file = "temp.mp4"
    # return temp.mp4
    
    return result.get("segments"), words, output_file
 
gr.Interface(
    title = 'Download Video From url and extract text from audio', 
    fn=get_text_from_mp3_whisper, 
    inputs=[
        gr.Dropdown(["url", "file"], value="url"),
        gr.inputs.Audio(type="filepath"),
        gr.inputs.Textbox(),
        gr.Dropdown(["translate", "transcribe"], value="translate"),
        gr.Dropdown(["Japanese", "English"], value="Japanese")
    ],
    button_text="Go!",
    button_color="#333333",
    outputs=[
        "json", "text", "file"
    ],
    live=True).launch()