Whisper-Auto-Subtitled-Video-Generator

Runtime error

App Files Files Community

bugbounted

BatuhanYilmaz commited on Jan 20, 2023

Commit

6c164b4

0 Parent(s):

Duplicate from BatuhanYilmaz/Whisper-Auto-Subtitled-Video-Generator

Browse files

Co-authored-by: Batuhan Yilmaz <BatuhanYilmaz@users.noreply.huggingface.co>

Files changed (11) hide show

.streamlit/config.toml +8 -0
01_🎥_Input_YouTube_Link.py +258 -0
LICENSE +21 -0
README.md +13 -0
languages.py +101 -0
packages.txt +1 -0
pages/02_📼_Upload_Video_File.py +230 -0
pages/03_📝_Upload_Video_File_and_Transcript.py +130 -0
pages/04_🔊_Upload_Audio_File.py +205 -0
requirements.txt +9 -0
utils.py +96 -0

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,8 @@

+[theme]
+primaryColor="#F63366"
+backgroundColor="#FFFFFF"
+secondaryBackgroundColor="#F0F2F6"
+textColor="#262730"
+font="sans serif"
+[server]
+maxUploadSize=1028

01_🎥_Input_YouTube_Link.py ADDED Viewed

	@@ -0,0 +1,258 @@

+import whisper
+from pytube import YouTube
+import requests
+import time
+import streamlit as st
+from streamlit_lottie import st_lottie
+import numpy as np
+import os
+from typing import Iterator
+from io import StringIO
+from utils import write_vtt, write_srt
+import ffmpeg
+from languages import LANGUAGES
+st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
+# Define a function that we can use to load lottie files from a link.
+@st.cache()
+def load_lottieurl(url: str):
+    r = requests.get(url)
+    if r.status_code != 200:
+        return None
+    return r.json()
+col1, col2 = st.columns([1, 3])
+with col1:
+    lottie = load_lottieurl("https://assets8.lottiefiles.com/packages/lf20_jh9gfdye.json")
+    st_lottie(lottie)
+with col2:
+    st.write("""
+    ## Auto Subtitled Video Generator
+    ##### Input a YouTube video link and get a video with subtitles.
+    ###### ➠ If you want to transcribe the video in its original language, select the task as "Transcribe"
+    ###### ➠ If you want to translate the subtitles to English, select the task as "Translate"
+    ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
+@st.cache(allow_output_mutation=True)
+def populate_metadata(link):
+    yt = YouTube(link)
+    author = yt.author
+    title = yt.title
+    description = yt.description
+    thumbnail = yt.thumbnail_url
+    length = yt.length
+    views = yt.views
+    return author, title, description, thumbnail, length, views
+@st.cache(allow_output_mutation=True)
+def download_video(link):
+    yt = YouTube(link)
+    video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download()
+    return video
+def convert(seconds):
+    return time.strftime("%H:%M:%S", time.gmtime(seconds))
+loaded_model = whisper.load_model("base")
+current_size = "None"
+@st.cache(allow_output_mutation=True)
+def change_model(current_size, size):
+    if current_size != size:
+        loaded_model = whisper.load_model(size)
+        return loaded_model
+    else:
+        raise Exception("Model size is the same as the current size.")
+@st.cache(allow_output_mutation=True)
+def inference(link, loaded_model, task):
+    yt = YouTube(link)
+    path = yt.streams.filter(only_audio=True)[0].download(filename="audio.mp3")
+    if task == "Transcribe":
+        options = dict(task="transcribe", best_of=5)
+        results = loaded_model.transcribe(path, **options)
+        vtt = getSubs(results["segments"], "vtt", 80)
+        srt = getSubs(results["segments"], "srt", 80)
+        lang = results["language"]
+        return results["text"], vtt, srt, lang
+    elif task == "Translate":
+        options = dict(task="translate", best_of=5)
+        results = loaded_model.transcribe(path, **options)
+        vtt = getSubs(results["segments"], "vtt", 80)
+        srt = getSubs(results["segments"], "srt", 80)
+        lang = results["language"]
+        return results["text"], vtt, srt, lang
+    else:
+        raise ValueError("Task not supported")
+@st.cache(allow_output_mutation=True)
+def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
+    segmentStream = StringIO()
+    if format == 'vtt':
+        write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
+    elif format == 'srt':
+        write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
+    else:
+        raise Exception("Unknown format " + format)
+    segmentStream.seek(0)
+    return segmentStream.read()
+def get_language_code(language):
+    if language in LANGUAGES.keys():
+        detected_language = LANGUAGES[language]
+        return detected_language
+    else:
+        raise ValueError("Language not supported")
+def generate_subtitled_video(video, audio, transcript):
+    video_file = ffmpeg.input(video)
+    audio_file = ffmpeg.input(audio)
+    ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("final.mp4").run(quiet=True, overwrite_output=True)
+    video_with_subs = open("final.mp4", "rb")
+    return video_with_subs
+def main():
+    size = st.selectbox("Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)", ["tiny", "base", "small", "medium", "large"], index=1)
+    loaded_model = change_model(current_size, size)
+    st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
+        f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
+    link = st.text_input("YouTube Link (The longer the video, the longer the processing time)")
+    task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
+    if task == "Transcribe":
+        if st.button("Transcribe"):
+            author, title, description, thumbnail, length, views = populate_metadata(link)
+            results = inference(link, loaded_model, task)
+            video = download_video(link)
+            lang = results[3]
+            detected_language = get_language_code(lang)
+            col3, col4 = st.columns(2)
+            col5, col6, col7, col8 = st.columns(4)
+            col9, col10 = st.columns(2)
+            with col3:
+                st.video(video)
+            # Write the results to a .txt file and download it.
+            with open("transcript.txt", "w+", encoding='utf8') as f:
+                f.writelines(results[0])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
+                datatxt = f.read()
+            with open("transcript.vtt", "w+",encoding='utf8') as f:
+                f.writelines(results[1])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
+                datavtt = f.read()
+            with open("transcript.srt", "w+",encoding='utf8') as f:
+                f.writelines(results[2])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
+                datasrt = f.read()
+            with col5:
+                st.download_button(label="Download Transcript (.txt)",
+                                data=datatxt,
+                                file_name="transcript.txt")
+            with col6:
+                st.download_button(label="Download Transcript (.vtt)",
+                                    data=datavtt,
+                                    file_name="transcript.vtt")
+            with col7:
+                st.download_button(label="Download Transcript (.srt)",
+                                    data=datasrt,
+                                    file_name="transcript.srt")
+            with col9:
+                st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
+            with col10:
+                st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
+            with col4:
+                with st.spinner("Generating Subtitled Video"):
+                    video_with_subs = generate_subtitled_video(video, "audio.mp3", "transcript.srt")
+                st.video(video_with_subs)
+                st.balloons()
+            with col8:
+                st.download_button(label="Download Subtitled Video",
+                                    data=video_with_subs,
+                                    file_name=f"{title} with subtitles.mp4")
+    elif task == "Translate":
+        if st.button("Translate to English"):
+            author, title, description, thumbnail, length, views = populate_metadata(link)
+            results = inference(link, loaded_model, task)
+            video = download_video(link)
+            lang = results[3]
+            detected_language = get_language_code(lang)
+            col3, col4 = st.columns(2)
+            col5, col6, col7, col8 = st.columns(4)
+            col9, col10 = st.columns(2)
+            with col3:
+                st.video(video)
+            # Write the results to a .txt file and download it.
+            with open("transcript.txt", "w+", encoding='utf8') as f:
+                f.writelines(results[0])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
+                datatxt = f.read()
+            with open("transcript.vtt", "w+",encoding='utf8') as f:
+                f.writelines(results[1])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
+                datavtt = f.read()
+            with open("transcript.srt", "w+",encoding='utf8') as f:
+                f.writelines(results[2])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
+                datasrt = f.read()
+            with col5:
+                st.download_button(label="Download Transcript (.txt)",
+                                data=datatxt,
+                                file_name="transcript.txt")
+            with col6:
+                st.download_button(label="Download Transcript (.vtt)",
+                                    data=datavtt,
+                                    file_name="transcript.vtt")
+            with col7:
+                st.download_button(label="Download Transcript (.srt)",
+                                    data=datasrt,
+                                    file_name="transcript.srt")
+            with col9:
+                st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
+            with col10:
+                st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
+            with col4:
+                with st.spinner("Generating Subtitled Video"):
+                    video_with_subs = generate_subtitled_video(video, "audio.mp3", "transcript.srt")
+                st.video(video_with_subs)
+                st.balloons()
+            with col8:
+                st.download_button(label="Download Subtitled Video",
+                                    data=video_with_subs,
+                                    file_name=f"{title} with subtitles.mp4")
+    else:
+        st.error("Please select a task.")
+if __name__ == "__main__":
+    main()
+    st.markdown("###### Made with :heart: by [@BatuhanYılmaz](https://twitter.com/batuhan3326) [![this is an image link](https://i.imgur.com/thJhzOO.png)](https://www.buymeacoffee.com/batuhanylmz)")

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2022 Batuhan Yılmaz
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Whisper-Auto-Subtitled-Video-Generator
+emoji: 🎥
+colorFrom: blue
+colorTo: purple
+sdk: streamlit
+sdk_version: 1.10.0
+app_file: 01_🎥_Input_YouTube_Link.py
+pinned: false
+duplicated_from: BatuhanYilmaz/Whisper-Auto-Subtitled-Video-Generator
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

languages.py ADDED Viewed

	@@ -0,0 +1,101 @@

+LANGUAGES = {
+    "en": "eng",
+    "zh": "zho",
+    "de": "deu",
+    "es": "spa",
+    "ru": "rus",
+    "ko": "kor",
+    "fr": "fra",
+    "ja": "jpn",
+    "pt": "por",
+    "tr": "tur",
+    "pl": "pol",
+    "ca": "cat",
+    "nl": "nld",
+    "ar": "ara",
+    "sv": "swe",
+    "it": "ita",
+    "id": "ind",
+    "hi": "hin",
+    "fi": "fin",
+    "vi": "vie",
+    "iw": "heb",
+    "uk": "ukr",
+    "el": "ell",
+    "ms": "msa",
+    "cs": "ces",
+    "ro": "ron",
+    "da": "dan",
+    "hu": "hun",
+    "ta": "tam",
+    "no": "nor",
+    "th": "tha",
+    "ur": "urd",
+    "hr": "hrv",
+    "bg": "bul",
+    "lt": "lit",
+    "la": "lat",
+    "mi": "mri",
+    "ml": "mal",
+    "cy": "cym",
+    "sk": "slk",
+    "te": "tel",
+    "fa": "fas",
+    "lv": "lav",
+    "bn": "ben",
+    "sr": "srp",
+    "az": "aze",
+    "sl": "slv",
+    "kn": "kan",
+    "et": "est",
+    "mk": "mkd",
+    "br": "bre",
+    "eu": "eus",
+    "is": "isl",
+    "hy": "hye",
+    "ne": "nep",
+    "mn": "mon",
+    "bs": "bos",
+    "kk": "kaz",
+    "sq": "sqi",
+    "sw": "swa",
+    "gl": "glg",
+    "mr": "mar",
+    "pa": "pan",
+    "si": "sin",
+    "km": "khm",
+    "sn": "sna",
+    "yo": "yor",
+    "so": "som",
+    "af": "afr",
+    "oc": "oci",
+    "ka": "kat",
+    "be": "bel",
+    "tg": "tgk",
+    "sd": "snd",
+    "gu": "guj",
+    "am": "amh",
+    "yi": "yid",
+    "lo": "lao",
+    "uz": "uzb",
+    "fo": "fao",
+    "ht": "hat",
+    "ps": "pus",
+    "tk": "tuk",
+    "nn": "nno",
+    "mt": "mlt",
+    "sa": "san",
+    "lb": "ltz",
+    "my": "mya",
+    "bo": "bod",
+    "tl": "tgl",
+    "mg": "mlg",
+    "as": "asm",
+    "tt": "tat",
+    "haw": "haw",
+    "ln": "lin",
+    "ha": "hau",
+    "ba": "bak",
+    "jw": "jav",
+    "su": "sun",
+}

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

pages/02_📼_Upload_Video_File.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import whisper
+import streamlit as st
+from streamlit_lottie import st_lottie
+from utils import write_vtt, write_srt
+import ffmpeg
+import requests
+from typing import Iterator
+from io import StringIO
+import numpy as np
+import pathlib
+import os
+st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
+# Define a function that we can use to load lottie files from a link.
+@st.cache(allow_output_mutation=True)
+def load_lottieurl(url: str):
+    r = requests.get(url)
+    if r.status_code != 200:
+        return None
+    return r.json()
+APP_DIR = pathlib.Path(__file__).parent.absolute()
+LOCAL_DIR = APP_DIR / "local"
+LOCAL_DIR.mkdir(exist_ok=True)
+save_dir = LOCAL_DIR / "output"
+save_dir.mkdir(exist_ok=True)
+loaded_model = whisper.load_model("base")
+current_size = "None"
+col1, col2 = st.columns([1, 3])
+with col1:
+    lottie = load_lottieurl("https://assets1.lottiefiles.com/packages/lf20_HjK9Ol.json")
+    st_lottie(lottie)
+with col2:
+    st.write("""
+    ## Auto Subtitled Video Generator
+    ##### Upload a video file and get a video with subtitles.
+    ###### ➠ If you want to transcribe the video in its original language, select the task as "Transcribe"
+    ###### ➠ If you want to translate the subtitles to English, select the task as "Translate"
+    ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
+@st.cache(allow_output_mutation=True)
+def change_model(current_size, size):
+    if current_size != size:
+        loaded_model = whisper.load_model(size)
+        return loaded_model
+    else:
+        raise Exception("Model size is the same as the current size.")
+@st.cache(allow_output_mutation=True)
+def inferecence(loaded_model, uploaded_file, task):
+    with open(f"{save_dir}/input.mp4", "wb") as f:
+            f.write(uploaded_file.read())
+    audio = ffmpeg.input(f"{save_dir}/input.mp4")
+    audio = ffmpeg.output(audio, f"{save_dir}/output.wav", acodec="pcm_s16le", ac=1, ar="16k")
+    ffmpeg.run(audio, overwrite_output=True)
+    if task == "Transcribe":
+        options = dict(task="transcribe", best_of=5)
+        results = loaded_model.transcribe(f"{save_dir}/output.wav", **options)
+        vtt = getSubs(results["segments"], "vtt", 80)
+        srt = getSubs(results["segments"], "srt", 80)
+        lang = results["language"]
+        return results["text"], vtt, srt, lang
+    elif task == "Translate":
+        options = dict(task="translate", best_of=5)
+        results = loaded_model.transcribe(f"{save_dir}/output.wav", **options)
+        vtt = getSubs(results["segments"], "vtt", 80)
+        srt = getSubs(results["segments"], "srt", 80)
+        lang = results["language"]
+        return results["text"], vtt, srt, lang
+    else:
+        raise ValueError("Task not supported")
+def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
+    segmentStream = StringIO()
+    if format == 'vtt':
+        write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
+    elif format == 'srt':
+        write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
+    else:
+        raise Exception("Unknown format " + format)
+    segmentStream.seek(0)
+    return segmentStream.read()
+def generate_subtitled_video(video, audio, transcript):
+    video_file = ffmpeg.input(video)
+    audio_file = ffmpeg.input(audio)
+    ffmpeg.concat(video_file.filter("subtitles", transcript), audio_file, v=1, a=1).output("final.mp4").run(quiet=True, overwrite_output=True)
+    video_with_subs = open("final.mp4", "rb")
+    return video_with_subs
+def main():
+    size = st.selectbox("Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)", ["tiny", "base", "small", "medium", "large"], index=1)
+    loaded_model = change_model(current_size, size)
+    st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
+        f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
+    input_file = st.file_uploader("File", type=["mp4", "avi", "mov", "mkv"])
+    # get the name of the input_file
+    if input_file is not None:
+        filename = input_file.name[:-4]
+    else:
+        filename = None
+    task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
+    if task == "Transcribe":
+        if st.button("Transcribe"):
+            results = inferecence(loaded_model, input_file, task)
+            col3, col4 = st.columns(2)
+            col5, col6, col7, col8 = st.columns(4)
+            col9, col10 = st.columns(2)
+            with col3:
+                st.video(input_file)
+            with open("transcript.txt", "w+", encoding='utf8') as f:
+                f.writelines(results[0])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
+                datatxt = f.read()
+            with open("transcript.vtt", "w+",encoding='utf8') as f:
+                f.writelines(results[1])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
+                datavtt = f.read()
+            with open("transcript.srt", "w+",encoding='utf8') as f:
+                f.writelines(results[2])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
+                datasrt = f.read()
+            with col5:
+                st.download_button(label="Download Transcript (.txt)",
+                                data=datatxt,
+                                file_name="transcript.txt")
+            with col6:
+                st.download_button(label="Download Transcript (.vtt)",
+                                    data=datavtt,
+                                    file_name="transcript.vtt")
+            with col7:
+                st.download_button(label="Download Transcript (.srt)",
+                                    data=datasrt,
+                                    file_name="transcript.srt")
+            with col9:
+                st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
+            with col10:
+                st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
+            with col4:
+                with st.spinner("Generating Subtitled Video"):
+                    video_with_subs = generate_subtitled_video(f"{save_dir}/input.mp4", f"{save_dir}/output.wav", "transcript.srt")
+                st.video(video_with_subs)
+                st.snow()
+            with col8:
+                st.download_button(label="Download Video with Subtitles",
+                                data=video_with_subs,
+                                file_name=f"{filename}_with_subs.mp4")
+    elif task == "Translate":
+        if st.button("Translate to English"):
+            results = inferecence(loaded_model, input_file, task)
+            col3, col4 = st.columns(2)
+            col5, col6, col7, col8 = st.columns(4)
+            col9, col10 = st.columns(2)
+            with col3:
+                st.video(input_file)
+            with open("transcript.txt", "w+", encoding='utf8') as f:
+                f.writelines(results[0])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
+                datatxt = f.read()
+            with open("transcript.vtt", "w+",encoding='utf8') as f:
+                f.writelines(results[1])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
+                datavtt = f.read()
+            with open("transcript.srt", "w+",encoding='utf8') as f:
+                f.writelines(results[2])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
+                datasrt = f.read()
+            with col5:
+                st.download_button(label="Download Transcript (.txt)",
+                                data=datatxt,
+                                file_name="transcript.txt")
+            with col6:
+                st.download_button(label="Download Transcript (.vtt)",
+                                    data=datavtt,
+                                    file_name="transcript.vtt")
+            with col7:
+                st.download_button(label="Download Transcript (.srt)",
+                                    data=datasrt,
+                                    file_name="transcript.srt")
+            with col9:
+                st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
+            with col10:
+                st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
+            with col4:
+                with st.spinner("Generating Subtitled Video"):
+                    video_with_subs = generate_subtitled_video(f"{save_dir}/input.mp4", f"{save_dir}/output.wav", "transcript.srt")
+                st.video(video_with_subs)
+                st.snow()
+            with col8:
+                st.download_button(label="Download Video with Subtitles",
+                                data=video_with_subs,
+                                file_name=f"{filename}_with_subs.mp4")
+    else:
+        st.error("Please select a task.")
+if __name__ == "__main__":
+    main()
+    st.markdown("###### Made with :heart: by [@BatuhanYılmaz](https://twitter.com/batuhan3326) [![this is an image link](https://i.imgur.com/thJhzOO.png)](https://www.buymeacoffee.com/batuhanylmz)")

pages/03_📝_Upload_Video_File_and_Transcript.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import streamlit as st
+from streamlit_lottie import st_lottie
+from utils import write_vtt, write_srt
+import ffmpeg
+import requests
+from typing import Iterator
+from io import StringIO
+import numpy as np
+import pathlib
+import os
+st.set_page_config(page_title="Auto Subtitled Video Generator", page_icon=":movie_camera:", layout="wide")
+# Define a function that we can use to load lottie files from a link.
+@st.cache(allow_output_mutation=True)
+def load_lottieurl(url: str):
+    r = requests.get(url)
+    if r.status_code != 200:
+        return None
+    return r.json()
+APP_DIR = pathlib.Path(__file__).parent.absolute()
+LOCAL_DIR = APP_DIR / "local_transcript"
+LOCAL_DIR.mkdir(exist_ok=True)
+save_dir = LOCAL_DIR / "output"
+save_dir.mkdir(exist_ok=True)
+col1, col2 = st.columns([1, 3])
+with col1:
+    lottie = load_lottieurl("https://assets6.lottiefiles.com/packages/lf20_cjnxwrkt.json")
+    st_lottie(lottie)
+with col2:
+    st.write("""
+    ## Auto Subtitled Video Generator
+    ##### ➠ Upload a video file and a transcript as .srt or .vtt file and get a video with subtitles.
+    ##### ➠ Processing time will increase as the video length increases. """)
+def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
+    segmentStream = StringIO()
+    if format == 'vtt':
+        write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
+    elif format == 'srt':
+        write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
+    else:
+        raise Exception("Unknown format " + format)
+    segmentStream.seek(0)
+    return segmentStream.read()
+def split_video_audio(uploaded_file):
+    with open(f"{save_dir}/input.mp4", "wb") as f:
+            f.write(uploaded_file.read())
+    audio = ffmpeg.input(f"{save_dir}/input.mp4")
+    audio = ffmpeg.output(audio, f"{save_dir}/output.wav", acodec="pcm_s16le", ac=1, ar="16k")
+    ffmpeg.run(audio, overwrite_output=True)
+def main():
+    uploaded_video = st.file_uploader("Upload Video File", type=["mp4", "avi", "mov", "mkv"])
+    # get the name of the input_file
+    if uploaded_video is not None:
+        filename = uploaded_video.name[:-4]
+    else:
+        filename = None
+    transcript_file = st.file_uploader("Upload Transcript File", type=["srt", "vtt"])
+    if transcript_file is not None:
+        transcript_name = transcript_file.name
+    else:
+        transcript_name = None
+    if uploaded_video is not None and transcript_file is not None:
+        if transcript_name[-3:] == "vtt":
+            with open("uploaded_transcript.vtt", "wb") as f:
+                f.writelines(transcript_file)
+                f.close()
+            with open(os.path.join(os.getcwd(), "uploaded_transcript.vtt"), "rb") as f:
+                vtt_file = f.read()
+            if st.button("Generate Video with Subtitles"):
+                with st.spinner("Generating Subtitled Video"):
+                    split_video_audio(uploaded_video)
+                    video_file = ffmpeg.input(f"{save_dir}/input.mp4")
+                    audio_file = ffmpeg.input(f"{save_dir}/output.wav")
+                    ffmpeg.concat(video_file.filter("subtitles", "uploaded_transcript.vtt"), audio_file, v=1, a=1).output("final.mp4").global_args('-report').run(quiet=True, overwrite_output=True)
+                    video_with_subs = open("final.mp4", "rb")
+                col3, col4 = st.columns(2)
+                with col3:
+                    st.video(uploaded_video)
+                with col4:
+                    st.video(video_with_subs)
+                st.download_button(label="Download Video with Subtitles",
+                                    data=video_with_subs,
+                                    file_name=f"{filename}_with_subs.mp4")
+        elif transcript_name[-3:] == "srt":
+            with open("uploaded_transcript.srt", "wb") as f:
+                f.writelines(transcript_file)
+                f.close()
+            with open(os.path.join(os.getcwd(), "uploaded_transcript.srt"), "rb") as f:
+                srt_file = f.read()
+            if st.button("Generate Video with Subtitles"):
+                with st.spinner("Generating Subtitled Video"):
+                    split_video_audio(uploaded_video)
+                    video_file = ffmpeg.input(f"{save_dir}/input.mp4")
+                    audio_file = ffmpeg.input(f"{save_dir}/output.wav")
+                    ffmpeg.concat(video_file.filter("subtitles",  "uploaded_transcript.srt"), audio_file, v=1, a=1).output("final.mp4").run(quiet=True, overwrite_output=True)
+                    video_with_subs = open("final.mp4", "rb")
+                col3, col4 = st.columns(2)
+                with col3:
+                    st.video(uploaded_video)
+                with col4:
+                    st.video(video_with_subs)
+                st.download_button(label="Download Video with Subtitles",
+                                    data=video_with_subs,
+                                    file_name=f"{filename}_with_subs.mp4")
+        else:
+            st.error("Please upload a .srt or .vtt file")
+    else:
+        st.info("Please upload a video file and a transcript file")
+if __name__ == "__main__":
+    main()

pages/04_🔊_Upload_Audio_File.py ADDED Viewed

	@@ -0,0 +1,205 @@

+import whisper
+import streamlit as st
+from streamlit_lottie import st_lottie
+from utils import write_vtt, write_srt
+import ffmpeg
+import requests
+from typing import Iterator
+from io import StringIO
+import numpy as np
+import pathlib
+import os
+st.set_page_config(page_title="Auto Transcriber", page_icon="🔊", layout="wide")
+# Define a function that we can use to load lottie files from a link.
+@st.cache(allow_output_mutation=True)
+def load_lottieurl(url: str):
+    r = requests.get(url)
+    if r.status_code != 200:
+        return None
+    return r.json()
+APP_DIR = pathlib.Path(__file__).parent.absolute()
+LOCAL_DIR = APP_DIR / "local_audio"
+LOCAL_DIR.mkdir(exist_ok=True)
+save_dir = LOCAL_DIR / "output"
+save_dir.mkdir(exist_ok=True)
+col1, col2 = st.columns([1, 3])
+with col1:
+    lottie = load_lottieurl("https://assets1.lottiefiles.com/packages/lf20_1xbk4d2v.json")
+    st_lottie(lottie)
+with col2:
+    st.write("""
+    ## Auto Transcriber
+    ##### Input an audio file and get a transcript.
+    ###### ➠ If you want to transcribe the audio in its original language, select the task as "Transcribe"
+    ###### ➠ If you want to translate the transcription to English, select the task as "Translate"
+    ###### I recommend starting with the base model and then experimenting with the larger models, the small and medium models often work well. """)
+loaded_model = whisper.load_model("base")
+current_size = "None"
+@st.cache(allow_output_mutation=True)
+def change_model(current_size, size):
+    if current_size != size:
+        loaded_model = whisper.load_model(size)
+        return loaded_model
+    else:
+        raise Exception("Model size is the same as the current size.")
+@st.cache(allow_output_mutation=True)
+def inferecence(loaded_model, uploaded_file, task):
+    with open(f"{save_dir}/input.mp3", "wb") as f:
+            f.write(uploaded_file.read())
+    audio = ffmpeg.input(f"{save_dir}/input.mp3")
+    audio = ffmpeg.output(audio, f"{save_dir}/output.wav", acodec="pcm_s16le", ac=1, ar="16k")
+    ffmpeg.run(audio, overwrite_output=True)
+    if task == "Transcribe":
+        options = dict(task="transcribe", best_of=5)
+        results = loaded_model.transcribe(f"{save_dir}/output.wav", **options)
+        vtt = getSubs(results["segments"], "vtt", 80)
+        srt = getSubs(results["segments"], "srt", 80)
+        lang = results["language"]
+        return results["text"], vtt, srt, lang
+    elif task == "Translate":
+        options = dict(task="translate", best_of=5)
+        results = loaded_model.transcribe(f"{save_dir}/output.wav", **options)
+        vtt = getSubs(results["segments"], "vtt", 80)
+        srt = getSubs(results["segments"], "srt", 80)
+        lang = results["language"]
+        return results["text"], vtt, srt, lang
+    else:
+        raise ValueError("Task not supported")
+def getSubs(segments: Iterator[dict], format: str, maxLineWidth: int) -> str:
+    segmentStream = StringIO()
+    if format == 'vtt':
+        write_vtt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
+    elif format == 'srt':
+        write_srt(segments, file=segmentStream, maxLineWidth=maxLineWidth)
+    else:
+        raise Exception("Unknown format " + format)
+    segmentStream.seek(0)
+    return segmentStream.read()
+def main():
+    size = st.selectbox("Select Model Size (The larger the model, the more accurate the transcription will be, but it will take longer)", ["tiny", "base", "small", "medium", "large"], index=1)
+    loaded_model = change_model(current_size, size)
+    st.write(f"Model is {'multilingual' if loaded_model.is_multilingual else 'English-only'} "
+        f"and has {sum(np.prod(p.shape) for p in loaded_model.parameters()):,} parameters.")
+    input_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "m4a"])
+    if input_file is not None:
+        filename = input_file.name[:-4]
+    else:
+        filename = None
+    task = st.selectbox("Select Task", ["Transcribe", "Translate"], index=0)
+    if task == "Transcribe":
+        if st.button("Transcribe"):
+            results = inferecence(loaded_model, input_file, task)
+            col3, col4 = st.columns(2)
+            col5, col6, col7 = st.columns(3)
+            col9, col10 = st.columns(2)
+            with col3:
+                st.audio(input_file)
+            with open("transcript.txt", "w+", encoding='utf8') as f:
+                f.writelines(results[0])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
+                datatxt = f.read()
+            with open("transcript.vtt", "w+",encoding='utf8') as f:
+                f.writelines(results[1])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
+                datavtt = f.read()
+            with open("transcript.srt", "w+",encoding='utf8') as f:
+                f.writelines(results[2])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
+                datasrt = f.read()
+            with col5:
+                st.download_button(label="Download Transcript (.txt)",
+                                data=datatxt,
+                                file_name="transcript.txt")
+            with col6:
+                st.download_button(label="Download Transcript (.vtt)",
+                                    data=datavtt,
+                                    file_name="transcript.vtt")
+            with col7:
+                st.download_button(label="Download Transcript (.srt)",
+                                    data=datasrt,
+                                    file_name="transcript.srt")
+            with col9:
+                st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
+            with col10:
+                st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
+    elif task == "Translate":
+        if st.button("Translate to English"):
+            results = inferecence(loaded_model, input_file, task)
+            col3, col4 = st.columns(2)
+            col5, col6, col7 = st.columns(3)
+            col9, col10 = st.columns(2)
+            with col3:
+                st.audio(input_file)
+            with open("transcript.txt", "w+", encoding='utf8') as f:
+                f.writelines(results[0])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.txt"), "rb") as f:
+                datatxt = f.read()
+            with open("transcript.vtt", "w+",encoding='utf8') as f:
+                f.writelines(results[1])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.vtt"), "rb") as f:
+                datavtt = f.read()
+            with open("transcript.srt", "w+",encoding='utf8') as f:
+                f.writelines(results[2])
+                f.close()
+            with open(os.path.join(os.getcwd(), "transcript.srt"), "rb") as f:
+                datasrt = f.read()
+            with col5:
+                st.download_button(label="Download Transcript (.txt)",
+                                data=datatxt,
+                                file_name="transcript.txt")
+            with col6:
+                st.download_button(label="Download Transcript (.vtt)",
+                                    data=datavtt,
+                                    file_name="transcript.vtt")
+            with col7:
+                st.download_button(label="Download Transcript (.srt)",
+                                    data=datasrt,
+                                    file_name="transcript.srt")
+            with col9:
+                st.success("You can download the transcript in .srt format, edit it (if you need to) and upload it to YouTube to create subtitles for your video.")
+            with col10:
+                st.info("Streamlit refreshes after the download button is clicked. The data is cached so you can download the transcript again without having to transcribe the video again.")
+    else:
+        st.error("Please select a task.")
+if __name__ == "__main__":
+    main()
+    st.markdown("###### Made with :heart: by [@BatuhanYılmaz](https://twitter.com/batuhan3326) [![this is an image link](https://i.imgur.com/thJhzOO.png)](https://www.buymeacoffee.com/batuhanylmz)")

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+git+https://github.com/openai/whisper.git
+ffmpeg==1.4
+ffmpeg_python==0.2.0
+numpy==1.23.3
+pytube==12.1.0
+requests==2.28.1
+streamlit==1.13.0
+streamlit_lottie==0.0.3
+whisper

utils.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import textwrap
+import zlib
+from typing import Iterator, TextIO
+def exact_div(x, y):
+    assert x % y == 0
+    return x // y
+def str2bool(string):
+    str2val = {"True": True, "False": False}
+    if string in str2val:
+        return str2val[string]
+    else:
+        raise ValueError(f"Expected one of {set(str2val.keys())}, got {string}")
+def optional_int(string):
+    return None if string == "None" else int(string)
+def optional_float(string):
+    return None if string == "None" else float(string)
+def compression_ratio(text) -> float:
+    return len(text) / len(zlib.compress(text.encode("utf-8")))
+def format_timestamp(seconds: float, always_include_hours: bool = False, fractionalSeperator: str = '.'):
+    assert seconds >= 0, "non-negative timestamp expected"
+    milliseconds = round(seconds * 1000.0)
+    hours = milliseconds // 3_600_000
+    milliseconds -= hours * 3_600_000
+    minutes = milliseconds // 60_000
+    milliseconds -= minutes * 60_000
+    seconds = milliseconds // 1_000
+    milliseconds -= seconds * 1_000
+    hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else ""
+    return f"{hours_marker}{minutes:02d}:{seconds:02d}{fractionalSeperator}{milliseconds:03d}"
+def write_txt(transcript: Iterator[dict], file: TextIO):
+    for segment in transcript:
+        print(segment['text'].strip(), file=file, flush=True)
+def write_vtt(transcript: Iterator[dict], file: TextIO, maxLineWidth=None):
+    print("WEBVTT\n", file=file)
+    for segment in transcript:
+        text = processText(segment['text'], maxLineWidth).replace('-->', '->')
+        print(
+            f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
+            f"{text}\n",
+            file=file,
+            flush=True,
+        )
+def write_srt(transcript: Iterator[dict], file: TextIO, maxLineWidth=None):
+    """
+    Write a transcript to a file in SRT format.
+    Example usage:
+        from pathlib import Path
+        from whisper.utils import write_srt
+        result = transcribe(model, audio_path, temperature=temperature, **args)
+        # save SRT
+        audio_basename = Path(audio_path).stem
+        with open(Path(output_dir) / (audio_basename + ".srt"), "w", encoding="utf-8") as srt:
+            write_srt(result["segments"], file=srt)
+    """
+    for i, segment in enumerate(transcript, start=1):
+        text = processText(segment['text'].strip(), maxLineWidth).replace('-->', '->')
+        # write srt lines
+        print(
+            f"{i}\n"
+            f"{format_timestamp(segment['start'], always_include_hours=True, fractionalSeperator=',')} --> "
+            f"{format_timestamp(segment['end'], always_include_hours=True, fractionalSeperator=',')}\n"
+            f"{text}\n",
+            file=file,
+            flush=True,
+        )
+def processText(text: str, maxLineWidth=None):
+    if (maxLineWidth is None or maxLineWidth < 0):
+        return text
+    lines = textwrap.wrap(text, width=maxLineWidth, tabsize=4)
+    return '\n'.join(lines)