diff --git a/app.py b/app.py
index bf46e6a36653108c1bfbaaf483eb8bd14b952885..0c09042a7935d06bd1b2081b68f24668f252f25f 100644
--- a/app.py
+++ b/app.py
@@ -1,55 +1,51 @@
-# main.py
-
-from fastapi import FastAPI
+import gradio as gr
 from PIL import Image
-import base64
-from fastapi.responses import HTMLResponse, FileResponse
-
-app = FastAPI()
-
-
-@app.get("/")
-async def root():
-    return FileResponse(path="static/index.html", media_type="text/html")
-
-@app.get("/html")
-async def root():
-    """Basic HTML response."""
-    body = (
-        "<html>"
-        "<body style='padding: 10px;'>"
-        "<h1>Welcome to the API</h1>"
-    "<div>"
-        "Check the docs: <a href='/docs'>here</a>"
-        "</div>"
-        "</body>"
-        "</html>"
-    )
-
-    return HTMLResponse(content=body)
-
-@app.get("/api")
-async def cal_api():
+import os
+import summarizer as su
+import nltk
+
+
+def image_mod(rpunkt_switch, link):
+
+    if len(link)==0:
+        return 'Error: No link provided', None
+
+    nltk_file = 'nltk_data/tokenizers/punkt.zip'
+    home_pc = '/Users/hujo/'
+    home_hf = '/home/user/'
+    if os.path.exists(home_pc+nltk_file) or os.path.exists(home_hf+nltk_file):
+        print('nltk punkt file exists in ', nltk_file)
+    else:
+        nltk.download('punkt')
+
+    #link = 'https://www.youtube.com/watch?v=lCnHfTHkhbE'
+    lexrank_switch = True
+    html = ''
     images = []
+    html, images = su.getSummary(link, lexrank_switch, rpunkt_switch)
+    #images = su.getSummaryImage(link, lexrank_switch, rpunkt_switch)
+    print(html)
+
+    files = os.listdir('workdir/')
+    print('local files: ',files)
 
-    with open('workdir/lion.jpg', 'rb') as open_file:
-        byte_content = open_file.read()
-    base64_bytes = base64.b64encode(byte_content)
-    base64_string = base64_bytes.decode('utf-8')
-    images.append(base64_string)
+    #image_path = 'workdir/lion.jpg'
+    #im = Image.open(image_path)
+    #images.append(im)
+    #with Image.open(open(image_path,'rb')) as im:
+    #    images.append(im)
+        #images.append(im.rotate(90))
+    
+    #images[0].save("newlion.png")
+ 
+    print('images',images)
 
-    with open('workdir/cheetah.jpg', 'rb') as open_file:
-        byte_content = open_file.read()
-    base64_bytes = base64.b64encode(byte_content)
-    base64_string = base64_bytes.decode('utf-8')
-    images.append(base64_string)
+    return html, images
 
-    #image_path='lion.jpg'
-    #pilim = Image.open(image_path)
-    #pilimrot = pilim.rotate(45)
-    return {"data": images}
 
-@app.get("/items/{item_id}")
-async def read_item(item_id):
-    return {"item_id": item_id}
+demo = gr.Interface(image_mod, 
+       [gr.Checkbox(label='Restore runctuation'), "text"] , ["html", gr.Gallery()],
+       allow_flagging="never")
 
+if __name__ == "__main__":
+    demo.launch()
diff --git a/frames.py b/frames.py
new file mode 100644
index 0000000000000000000000000000000000000000..043585fdb6a24ebb279d826f36adab02cf402a45
--- /dev/null
+++ b/frames.py
@@ -0,0 +1,102 @@
+from ast import Try
+import subprocess as sp
+import os
+
+# show current venv: echo $VIRTUAL_ENV
+# import sys
+# del sys.modules['frames']
+
+# transcript module
+# 1. extract timestamps from transcript
+# 2. extract captions from transcript
+# this module
+# 3. extract frames at timestamps 
+# 4. add caption to each frame 
+# 5. convert images to mp4 video 
+
+# converts a list of images to a mp4 video
+def convertImageToVideo():
+    cmd = "ffmpeg -y -f image2 -i frame_%04d.jpg output_video.mp4"
+    cmd_call = cmd.split()
+    working_dir = './workdir'
+
+    with sp.Popen(cmd_call,cwd=working_dir, stderr=sp.PIPE) as proc:
+        result = proc.stderr.read()
+
+    return [proc.wait(),result]
+
+
+# extract a frame as jpg image file 
+# from a video at a given timestamp
+# num=0; for p in $(cat timestamps); do ((num++)); printf "$num $p\r"; dnum=$(printf "%03d" "$num"); ffmpeg -ss $p -i "$mp4file" -frames:v 1 out_$dnum.jpg >& ffmpeg.out; done
+def extractImagesFromVideo(timestamps):
+    working_dir = './workdir'
+    input_file = 'input_video.mp4'
+    if not os.path.isfile(working_dir+'/'+input_file):
+        return 'Error: File '+input_file+' is missing, create the file first.'
+
+
+    # create a working directory for the files
+    if not os.path.isdir(working_dir):
+        print('There is no working directory. Create a new one.')
+        os.mkdir(working_dir)
+
+    proc_list = []
+    for current_frame, current_timestamp in enumerate(timestamps, start=1):
+        print(f"{current_frame:04d}", current_timestamp)
+        cmd = 'ffmpeg -y -ss '+str(current_timestamp)+' -i '+input_file+' -frames:v 1 frame_'+f"{current_frame:04d}"+'.jpg'
+        cmd_call = cmd.split()
+
+        with sp.Popen(cmd_call,cwd=working_dir, stderr=sp.PIPE) as proc:
+            proc_list.append(proc.wait())
+
+    return proc_list
+
+# add caption to each image
+# 'convert' porgram is from the 'imagemagick' package
+# num=0; while read p; do ((num++)); dnum=$(printf "%03d" "$num"); printf "$dnum $p\r"; convert out_$dnum.jpg -undercolor Black -fill white -gravity South -pointsize 25 -annotate +0+10     "$p" out_$dnum.jpg >& ffmpeg.out; done<srt.txt
+def addCaptionToImage(caption):
+    proc_list = []
+    for current_frame, current_caption in enumerate(caption.split('\n'), start=1):
+        print(f"{current_frame:04d}", current_caption)
+        #current_frame=182
+        #current_caption='with this method as compared to just'
+        
+        cmd = 'convert frame_'+f"{current_frame:04d}"+'.jpg -undercolor Black -fill white -gravity South -pointsize 25 -annotate +0+10'
+        cmd_call = cmd.split()
+        # the 'split' command would also split the input caption
+        # therefore it has to be added to the array after the split
+        cmd_call.append(current_caption)
+        cmd_call.append('frame_'+f"{current_frame:04d}"+'.jpg')
+        #cmd_call
+        working_dir = './workdir'
+
+        with sp.Popen(cmd_call,cwd=working_dir, stderr=sp.PIPE) as proc:
+            proc_list.append(proc.wait())
+
+    return proc_list
+
+
+def removeFilesInWorkdir():
+    result =''
+    working_dir = './workdir'
+    try:
+        for f in os.listdir(working_dir):
+            os.remove(os.path.join(working_dir, f))
+    except:
+        result = 'Error: Not all files could be removed.'
+    
+    return result
+
+def renameOutputVideo(filenme):
+    result = ''
+    working_dir = './workdir'
+    shelf_dir = './shelf'
+    input_filename = working_dir+'/'+'output_video.mp4'
+    output_filename = shelf_dir+'/'+filenme+'.mp4'
+    try:
+        os.rename(input_filename,output_filename)
+    except:
+        result = 'Error: Could not rename file.'
+    
+    return result
\ No newline at end of file
diff --git a/lexrank.py b/lexrank.py
new file mode 100644
index 0000000000000000000000000000000000000000..18d6fae70d7b8f6d9376ceea3b5251117f3bc8f0
--- /dev/null
+++ b/lexrank.py
@@ -0,0 +1,24 @@
+#import nltk
+#nltk.download('punkt')
+
+from sumy.parsers.html import HtmlParser
+from sumy.parsers.plaintext import PlaintextParser
+from sumy.nlp.tokenizers import Tokenizer
+from sumy.summarizers.lex_rank import LexRankSummarizer
+from sumy.nlp.stemmers import Stemmer
+from sumy.utils import get_stop_words
+
+def getSummary(text, nr_sentences):
+    summary=[]
+    LANGUAGE = "english"
+    SENTENCES_COUNT = nr_sentences
+    #parser = PlaintextParser.from_file("/Users/hujo/Downloads/Channel_Summaries/wholesaleted.srt.pnct.txt", Tokenizer(LANGUAGE))
+    parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
+    #print(parser.document)
+    stemmer = Stemmer(LANGUAGE)
+    summarizer = LexRankSummarizer(stemmer)
+    summarizer.stop_words = get_stop_words(LANGUAGE)
+    for sentence in summarizer(parser.document, SENTENCES_COUNT):
+        summary.append(sentence)
+
+    return summary
diff --git a/myrpunct/__init__.py b/myrpunct/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..27c3cd13f5f39b7e09908f380dd718a7e05d904e
--- /dev/null
+++ b/myrpunct/__init__.py
@@ -0,0 +1,2 @@
+from .punctuate import RestorePuncts
+print("init executed ...")
diff --git a/myrpunct/__pycache__/__init__.cpython-310.pyc b/myrpunct/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..54dfe75f86999181adddce19380395ce9d1bc778
Binary files /dev/null and b/myrpunct/__pycache__/__init__.cpython-310.pyc differ
diff --git a/myrpunct/__pycache__/__init__.cpython-39.pyc b/myrpunct/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..11475baa2611d3b1627ed269993bef3f0a0e4232
Binary files /dev/null and b/myrpunct/__pycache__/__init__.cpython-39.pyc differ
diff --git a/myrpunct/__pycache__/punctuate.cpython-310.pyc b/myrpunct/__pycache__/punctuate.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..70752d5404d14d473917c1189f6d0fb7b62cf3df
Binary files /dev/null and b/myrpunct/__pycache__/punctuate.cpython-310.pyc differ
diff --git a/myrpunct/__pycache__/punctuate.cpython-39.pyc b/myrpunct/__pycache__/punctuate.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aa2ac36d6978407141fe16df9235b6f18a5f256f
Binary files /dev/null and b/myrpunct/__pycache__/punctuate.cpython-39.pyc differ
diff --git a/myrpunct/punctuate.py b/myrpunct/punctuate.py
new file mode 100644
index 0000000000000000000000000000000000000000..c48cc00fc86f16e87eb54d452c670ac97a0c9598
--- /dev/null
+++ b/myrpunct/punctuate.py
@@ -0,0 +1,174 @@
+# -*- coding: utf-8 -*-
+# 💾⚙️🔮
+
+__author__ = "Daulet N."
+__email__ = "daulet.nurmanbetov@gmail.com"
+
+import logging
+from langdetect import detect
+from simpletransformers.ner import NERModel, NERArgs
+
+
+class RestorePuncts:
+    def __init__(self, wrds_per_pred=250, use_cuda=False):
+        self.wrds_per_pred = wrds_per_pred
+        self.overlap_wrds = 30
+        self.valid_labels = ['OU', 'OO', '.O', '!O', ',O', '.U', '!U', ',U', ':O', ';O', ':U', "'O", '-O', '?O', '?U']
+        self.model_hf = "wldmr/felflare-bert-restore-punctuation"
+        self.model_args = NERArgs()
+        self.model_args.silent = True
+        self.model_args.max_seq_length = 512
+        #self.model_args.use_multiprocessing = False
+        self.model = NERModel("bert", self.model_hf, labels=self.valid_labels, use_cuda=use_cuda, args=self.model_args)
+        #self.model = NERModel("bert", self.model_hf, labels=self.valid_labels, use_cuda=use_cuda, args={"silent": True, "max_seq_length": 512, "use_multiprocessing": False})
+        print("class init ...")
+        print("use_multiprocessing: ",self.model_args.use_multiprocessing)
+
+    def status(self):
+        print("function called")
+
+    def punctuate(self, text: str, lang:str=''):
+        """
+        Performs punctuation restoration on arbitrarily large text.
+        Detects if input is not English, if non-English was detected terminates predictions.
+        Overrride by supplying `lang='en'`
+        
+        Args:
+            - text (str): Text to punctuate, can be few words to as large as you want.
+            - lang (str): Explicit language of input text.
+        """
+        if not lang and len(text) > 10:
+            lang = detect(text)
+        if lang != 'en':
+            raise Exception(F"""Non English text detected. Restore Punctuation works only for English.
+            If you are certain the input is English, pass argument lang='en' to this function.
+            Punctuate received: {text}""")
+
+        # plit up large text into bert digestable chunks
+        splits = self.split_on_toks(text, self.wrds_per_pred, self.overlap_wrds)
+        # predict slices
+        # full_preds_lst contains tuple of labels and logits
+        full_preds_lst = [self.predict(i['text']) for i in splits]
+        # extract predictions, and discard logits
+        preds_lst = [i[0][0] for i in full_preds_lst]
+        # join text slices
+        combined_preds = self.combine_results(text, preds_lst)
+        # create punctuated prediction
+        punct_text = self.punctuate_texts(combined_preds)
+        return punct_text
+
+    def predict(self, input_slice):
+        """
+        Passes the unpunctuated text to the model for punctuation.
+        """
+        predictions, raw_outputs = self.model.predict([input_slice])
+        return predictions, raw_outputs
+
+    @staticmethod
+    def split_on_toks(text, length, overlap):
+        """
+        Splits text into predefined slices of overlapping text with indexes (offsets)
+        that tie-back to original text.
+        This is done to bypass 512 token limit on transformer models by sequentially
+        feeding chunks of < 512 toks.
+        Example output:
+        [{...}, {"text": "...", 'start_idx': 31354, 'end_idx': 32648}, {...}]
+        """
+        wrds = text.replace('\n', ' ').split(" ")
+        resp = []
+        lst_chunk_idx = 0
+        i = 0
+
+        while True:
+            # words in the chunk and the overlapping portion
+            wrds_len = wrds[(length * i):(length * (i + 1))]
+            wrds_ovlp = wrds[(length * (i + 1)):((length * (i + 1)) + overlap)]
+            wrds_split = wrds_len + wrds_ovlp
+
+            # Break loop if no more words
+            if not wrds_split:
+                break
+
+            wrds_str = " ".join(wrds_split)
+            nxt_chunk_start_idx = len(" ".join(wrds_len))
+            lst_char_idx = len(" ".join(wrds_split))
+
+            resp_obj = {
+                "text": wrds_str,
+                "start_idx": lst_chunk_idx,
+                "end_idx": lst_char_idx + lst_chunk_idx,
+            }
+
+            resp.append(resp_obj)
+            lst_chunk_idx += nxt_chunk_start_idx + 1
+            i += 1
+        logging.info(f"Sliced transcript into {len(resp)} slices.")
+        return resp
+
+    @staticmethod
+    def combine_results(full_text: str, text_slices):
+        """
+        Given a full text and predictions of each slice combines predictions into a single text again.
+        Performs validataion wether text was combined correctly
+        """
+        split_full_text = full_text.replace('\n', ' ').split(" ")
+        split_full_text = [i for i in split_full_text if i]
+        split_full_text_len = len(split_full_text)
+        output_text = []
+        index = 0
+
+        if len(text_slices[-1]) <= 3 and len(text_slices) > 1:
+            text_slices = text_slices[:-1]
+
+        for _slice in text_slices:
+            slice_wrds = len(_slice)
+            for ix, wrd in enumerate(_slice):
+                # print(index, "|", str(list(wrd.keys())[0]), "|", split_full_text[index])
+                if index == split_full_text_len:
+                    break
+
+                if split_full_text[index] == str(list(wrd.keys())[0]) and \
+                        ix <= slice_wrds - 3 and text_slices[-1] != _slice:
+                    index += 1
+                    pred_item_tuple = list(wrd.items())[0]
+                    output_text.append(pred_item_tuple)
+                elif split_full_text[index] == str(list(wrd.keys())[0]) and text_slices[-1] == _slice:
+                    index += 1
+                    pred_item_tuple = list(wrd.items())[0]
+                    output_text.append(pred_item_tuple)
+        assert [i[0] for i in output_text] == split_full_text
+        return output_text
+
+    @staticmethod
+    def punctuate_texts(full_pred: list):
+        """
+        Given a list of Predictions from the model, applies the predictions to text,
+        thus punctuating it.
+        """
+        punct_resp = ""
+        for i in full_pred:
+            word, label = i
+            if label[-1] == "U":
+                punct_wrd = word.capitalize()
+            else:
+                punct_wrd = word
+
+            if label[0] != "O":
+                punct_wrd += label[0]
+
+            punct_resp += punct_wrd + " "
+        punct_resp = punct_resp.strip()
+        # Append trailing period if doesnt exist.
+        if punct_resp[-1].isalnum():
+            punct_resp += "."
+        return punct_resp
+
+
+if __name__ == "__main__":
+    punct_model = RestorePuncts()
+    # read test file
+    with open('../tests/sample_text.txt', 'r') as fp:
+        test_sample = fp.read()
+    # predict text and print
+    punctuated = punct_model.punctuate(test_sample)
+    print(punctuated)
diff --git a/myrpunct/utils.py b/myrpunct/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..77e88f9bfbded47ca0929abf5dc5686e49d674ea
--- /dev/null
+++ b/myrpunct/utils.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+# 💾⚙️🔮
+
+__author__ = "Daulet N."
+__email__ = "daulet.nurmanbetov@gmail.com"
+
+def prepare_unpunct_text(text):
+    """
+    Given a text, normalizes it to subsequently restore punctuation
+    """
+    formatted_txt = text.replace('\n', '').strip()
+    formatted_txt = formatted_txt.lower()
+    formatted_txt_lst = formatted_txt.split(" ")
+    punct_strp_txt = [strip_punct(i) for i in formatted_txt_lst]
+    normalized_txt = " ".join([i for i in punct_strp_txt if i])
+    return normalized_txt
+
+def strip_punct(wrd):
+    """
+    Given a word, strips non aphanumeric characters that precede and follow it
+    """
+    if not wrd:
+        return wrd
+    
+    while not wrd[-1:].isalnum():
+        if not wrd:
+            break
+        wrd = wrd[:-1]
+    
+    while not wrd[:1].isalnum():
+        if not wrd:
+            break
+        wrd = wrd[1:]
+    return wrd
diff --git a/pytube/__init__.py b/pytube/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..4eaa1b2136cdeca46724e46d542a764707c41532
--- /dev/null
+++ b/pytube/__init__.py
@@ -0,0 +1,19 @@
+# flake8: noqa: F401
+# noreorder
+"""
+Pytube: a very serious Python library for downloading YouTube Videos.
+"""
+__title__ = "pytube"
+__author__ = "Ronnie Ghose, Taylor Fox Dahlin, Nick Ficano"
+__license__ = "The Unlicense (Unlicense)"
+__js__ = None
+__js_url__ = None
+
+from pytube.version import __version__
+from pytube.streams import Stream
+from pytube.captions import Caption
+from pytube.query import CaptionQuery, StreamQuery
+from pytube.__main__ import YouTube
+from pytube.contrib.playlist import Playlist
+from pytube.contrib.channel import Channel
+from pytube.contrib.search import Search
diff --git a/pytube/__main__.py b/pytube/__main__.py
new file mode 100644
index 0000000000000000000000000000000000000000..30e98e29104b37dc7ab37a03590641ea7ca378ff
--- /dev/null
+++ b/pytube/__main__.py
@@ -0,0 +1,467 @@
+"""
+This module implements the core developer interface for pytube.
+
+The problem domain of the :class:`YouTube <YouTube> class focuses almost
+exclusively on the developer interface. Pytube offloads the heavy lifting to
+smaller peripheral modules and functions.
+
+"""
+import logging
+from typing import Any, Callable, Dict, List, Optional
+
+import pytube
+import pytube.exceptions as exceptions
+from pytube import extract, request
+from pytube import Stream, StreamQuery
+from pytube.helpers import install_proxy
+from pytube.innertube import InnerTube
+from pytube.metadata import YouTubeMetadata
+from pytube.monostate import Monostate
+
+logger = logging.getLogger(__name__)
+
+
+class YouTube:
+    """Core developer interface for pytube."""
+
+    def __init__(
+        self,
+        url: str,
+        on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None,
+        on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None,
+        proxies: Dict[str, str] = None,
+        use_oauth: bool = False,
+        allow_oauth_cache: bool = True
+    ):
+        """Construct a :class:`YouTube <YouTube>`.
+
+        :param str url:
+            A valid YouTube watch URL.
+        :param func on_progress_callback:
+            (Optional) User defined callback function for stream download
+            progress events.
+        :param func on_complete_callback:
+            (Optional) User defined callback function for stream download
+            complete events.
+        :param dict proxies:
+            (Optional) A dict mapping protocol to proxy address which will be used by pytube.
+        :param bool use_oauth:
+            (Optional) Prompt the user to authenticate to YouTube.
+            If allow_oauth_cache is set to True, the user should only be prompted once.
+        :param bool allow_oauth_cache:
+            (Optional) Cache OAuth tokens locally on the machine. Defaults to True.
+            These tokens are only generated if use_oauth is set to True as well.
+        """
+        self._js: Optional[str] = None  # js fetched by js_url
+        self._js_url: Optional[str] = None  # the url to the js, parsed from watch html
+
+        self._vid_info: Optional[Dict] = None  # content fetched from innertube/player
+
+        self._watch_html: Optional[str] = None  # the html of /watch?v=<video_id>
+        self._embed_html: Optional[str] = None
+        self._player_config_args: Optional[Dict] = None  # inline js in the html containing
+        self._age_restricted: Optional[bool] = None
+
+        self._fmt_streams: Optional[List[Stream]] = None
+
+        self._initial_data = None
+        self._metadata: Optional[YouTubeMetadata] = None
+
+        # video_id part of /watch?v=<video_id>
+        self.video_id = extract.video_id(url)
+
+        self.watch_url = f"https://youtube.com/watch?v={self.video_id}"
+        self.embed_url = f"https://www.youtube.com/embed/{self.video_id}"
+
+        # Shared between all instances of `Stream` (Borg pattern).
+        self.stream_monostate = Monostate(
+            on_progress=on_progress_callback, on_complete=on_complete_callback
+        )
+
+        if proxies:
+            install_proxy(proxies)
+
+        self._author = None
+        self._title = None
+        self._publish_date = None
+
+        self.use_oauth = use_oauth
+        self.allow_oauth_cache = allow_oauth_cache
+
+    def __repr__(self):
+        return f'<pytube.__main__.YouTube object: videoId={self.video_id}>'
+
+    def __eq__(self, o: object) -> bool:
+        # Compare types and urls, if they're same return true, else return false.
+        return type(o) == type(self) and o.watch_url == self.watch_url
+
+    @property
+    def watch_html(self):
+        if self._watch_html:
+            return self._watch_html
+        self._watch_html = request.get(url=self.watch_url)
+        return self._watch_html
+
+    @property
+    def embed_html(self):
+        if self._embed_html:
+            return self._embed_html
+        self._embed_html = request.get(url=self.embed_url)
+        return self._embed_html
+
+    @property
+    def age_restricted(self):
+        if self._age_restricted:
+            return self._age_restricted
+        self._age_restricted = extract.is_age_restricted(self.watch_html)
+        return self._age_restricted
+
+    @property
+    def js_url(self):
+        if self._js_url:
+            return self._js_url
+
+        if self.age_restricted:
+            self._js_url = extract.js_url(self.embed_html)
+        else:
+            self._js_url = extract.js_url(self.watch_html)
+
+        return self._js_url
+
+    @property
+    def js(self):
+        if self._js:
+            return self._js
+
+        # If the js_url doesn't match the cached url, fetch the new js and update
+        #  the cache; otherwise, load the cache.
+        if pytube.__js_url__ != self.js_url:
+            self._js = request.get(self.js_url)
+            pytube.__js__ = self._js
+            pytube.__js_url__ = self.js_url
+        else:
+            self._js = pytube.__js__
+
+        return self._js
+
+    @property
+    def initial_data(self):
+        if self._initial_data:
+            return self._initial_data
+        self._initial_data = extract.initial_data(self.watch_html)
+        return self._initial_data
+
+    @property
+    def streaming_data(self):
+        """Return streamingData from video info."""
+        if 'streamingData' in self.vid_info:
+            return self.vid_info['streamingData']
+        else:
+            self.bypass_age_gate()
+            return self.vid_info['streamingData']
+
+    @property
+    def fmt_streams(self):
+        """Returns a list of streams if they have been initialized.
+
+        If the streams have not been initialized, finds all relevant
+        streams and initializes them.
+        """
+        self.check_availability()
+        if self._fmt_streams:
+            return self._fmt_streams
+
+        self._fmt_streams = []
+
+        stream_manifest = extract.apply_descrambler(self.streaming_data)
+
+        # If the cached js doesn't work, try fetching a new js file
+        # https://github.com/pytube/pytube/issues/1054
+        try:
+            extract.apply_signature(stream_manifest, self.vid_info, self.js)
+        except exceptions.ExtractError:
+            # To force an update to the js file, we clear the cache and retry
+            self._js = None
+            self._js_url = None
+            pytube.__js__ = None
+            pytube.__js_url__ = None
+            extract.apply_signature(stream_manifest, self.vid_info, self.js)
+
+        # build instances of :class:`Stream <Stream>`
+        # Initialize stream objects
+        for stream in stream_manifest:
+            video = Stream(
+                stream=stream,
+                monostate=self.stream_monostate,
+            )
+            self._fmt_streams.append(video)
+
+        self.stream_monostate.title = self.title
+        self.stream_monostate.duration = self.length
+
+        return self._fmt_streams
+
+    def check_availability(self):
+        """Check whether the video is available.
+
+        Raises different exceptions based on why the video is unavailable,
+        otherwise does nothing.
+        """
+        status, messages = extract.playability_status(self.watch_html)
+
+        for reason in messages:
+            if status == 'UNPLAYABLE':
+                if reason == (
+                    'Join this channel to get access to members-only content '
+                    'like this video, and other exclusive perks.'
+                ):
+                    raise exceptions.MembersOnly(video_id=self.video_id)
+                elif reason == 'This live stream recording is not available.':
+                    raise exceptions.RecordingUnavailable(video_id=self.video_id)
+                else:
+                    raise exceptions.VideoUnavailable(video_id=self.video_id)
+            elif status == 'LOGIN_REQUIRED':
+                if reason == (
+                    'This is a private video. '
+                    'Please sign in to verify that you may see it.'
+                ):
+                    raise exceptions.VideoPrivate(video_id=self.video_id)
+            elif status == 'ERROR':
+                if reason == 'Video unavailable':
+                    raise exceptions.VideoUnavailable(video_id=self.video_id)
+            elif status == 'LIVE_STREAM':
+                raise exceptions.LiveStreamError(video_id=self.video_id)
+
+    @property
+    def vid_info(self):
+        """Parse the raw vid info and return the parsed result.
+
+        :rtype: Dict[Any, Any]
+        """
+        if self._vid_info:
+            return self._vid_info
+
+        innertube = InnerTube(use_oauth=self.use_oauth, allow_cache=self.allow_oauth_cache)
+
+        innertube_response = innertube.player(self.video_id)
+        self._vid_info = innertube_response
+        return self._vid_info
+
+    def bypass_age_gate(self):
+        """Attempt to update the vid_info by bypassing the age gate."""
+        innertube = InnerTube(
+            client='ANDROID_EMBED',
+            use_oauth=self.use_oauth,
+            allow_cache=self.allow_oauth_cache
+        )
+        innertube_response = innertube.player(self.video_id)
+
+        playability_status = innertube_response['playabilityStatus'].get('status', None)
+
+        # If we still can't access the video, raise an exception
+        # (tier 3 age restriction)
+        if playability_status == 'UNPLAYABLE':
+            raise exceptions.AgeRestrictedError(self.video_id)
+
+        self._vid_info = innertube_response
+
+    @property
+    def caption_tracks(self) -> List[pytube.Caption]:
+        """Get a list of :class:`Caption <Caption>`.
+
+        :rtype: List[Caption]
+        """
+        raw_tracks = (
+            self.vid_info.get("captions", {})
+            .get("playerCaptionsTracklistRenderer", {})
+            .get("captionTracks", [])
+        )
+        return [pytube.Caption(track) for track in raw_tracks]
+
+    @property
+    def captions(self) -> pytube.CaptionQuery:
+        """Interface to query caption tracks.
+
+        :rtype: :class:`CaptionQuery <CaptionQuery>`.
+        """
+        return pytube.CaptionQuery(self.caption_tracks)
+
+    @property
+    def streams(self) -> StreamQuery:
+        """Interface to query both adaptive (DASH) and progressive streams.
+
+        :rtype: :class:`StreamQuery <StreamQuery>`.
+        """
+        self.check_availability()
+        return StreamQuery(self.fmt_streams)
+
+    @property
+    def thumbnail_url(self) -> str:
+        """Get the thumbnail url image.
+
+        :rtype: str
+        """
+        thumbnail_details = (
+            self.vid_info.get("videoDetails", {})
+            .get("thumbnail", {})
+            .get("thumbnails")
+        )
+        if thumbnail_details:
+            thumbnail_details = thumbnail_details[-1]  # last item has max size
+            return thumbnail_details["url"]
+
+        return f"https://img.youtube.com/vi/{self.video_id}/maxresdefault.jpg"
+
+    @property
+    def publish_date(self):
+        """Get the publish date.
+
+        :rtype: datetime
+        """
+        if self._publish_date:
+            return self._publish_date
+        self._publish_date = extract.publish_date(self.watch_html)
+        return self._publish_date
+
+    @publish_date.setter
+    def publish_date(self, value):
+        """Sets the publish date."""
+        self._publish_date = value
+
+    @property
+    def title(self) -> str:
+        """Get the video title.
+
+        :rtype: str
+        """
+        if self._title:
+            return self._title
+
+        try:
+            self._title = self.vid_info['videoDetails']['title']
+        except KeyError:
+            # Check_availability will raise the correct exception in most cases
+            #  if it doesn't, ask for a report.
+            self.check_availability()
+            raise exceptions.PytubeError(
+                (
+                    f'Exception while accessing title of {self.watch_url}. '
+                    'Please file a bug report at https://github.com/pytube/pytube'
+                )
+            )
+
+        return self._title
+
+    @title.setter
+    def title(self, value):
+        """Sets the title value."""
+        self._title = value
+
+    @property
+    def description(self) -> str:
+        """Get the video description.
+
+        :rtype: str
+        """
+        return self.vid_info.get("videoDetails", {}).get("shortDescription")
+
+    @property
+    def rating(self) -> float:
+        """Get the video average rating.
+
+        :rtype: float
+
+        """
+        return self.vid_info.get("videoDetails", {}).get("averageRating")
+
+    @property
+    def length(self) -> int:
+        """Get the video length in seconds.
+
+        :rtype: int
+        """
+        return int(self.vid_info.get('videoDetails', {}).get('lengthSeconds'))
+
+    @property
+    def views(self) -> int:
+        """Get the number of the times the video has been viewed.
+
+        :rtype: int
+        """
+        return int(self.vid_info.get("videoDetails", {}).get("viewCount"))
+
+    @property
+    def author(self) -> str:
+        """Get the video author.
+        :rtype: str
+        """
+        if self._author:
+            return self._author
+        self._author = self.vid_info.get("videoDetails", {}).get(
+            "author", "unknown"
+        )
+        return self._author
+
+    @author.setter
+    def author(self, value):
+        """Set the video author."""
+        self._author = value
+
+    @property
+    def keywords(self) -> List[str]:
+        """Get the video keywords.
+
+        :rtype: List[str]
+        """
+        return self.vid_info.get('videoDetails', {}).get('keywords', [])
+
+    @property
+    def channel_id(self) -> str:
+        """Get the video poster's channel id.
+
+        :rtype: str
+        """
+        return self.vid_info.get('videoDetails', {}).get('channelId', None)
+
+    @property
+    def channel_url(self) -> str:
+        """Construct the channel url for the video's poster from the channel id.
+
+        :rtype: str
+        """
+        return f'https://www.youtube.com/channel/{self.channel_id}'
+
+    @property
+    def metadata(self) -> Optional[YouTubeMetadata]:
+        """Get the metadata for the video.
+
+        :rtype: YouTubeMetadata
+        """
+        if self._metadata:
+            return self._metadata
+        else:
+            self._metadata = extract.metadata(self.initial_data)
+            return self._metadata
+
+    def register_on_progress_callback(self, func: Callable[[Any, bytes, int], None]):
+        """Register a download progress callback function post initialization.
+
+        :param callable func:
+            A callback function that takes ``stream``, ``chunk``,
+             and ``bytes_remaining`` as parameters.
+
+        :rtype: None
+
+        """
+        self.stream_monostate.on_progress = func
+
+    def register_on_complete_callback(self, func: Callable[[Any, Optional[str]], None]):
+        """Register a download complete callback function post initialization.
+
+        :param callable func:
+            A callback function that takes ``stream`` and  ``file_path``.
+
+        :rtype: None
+
+        """
+        self.stream_monostate.on_complete = func
diff --git a/pytube/__pycache__/__init__.cpython-310.pyc b/pytube/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..84df328c97072dca22bd94ba9baf25218d48fa19
Binary files /dev/null and b/pytube/__pycache__/__init__.cpython-310.pyc differ
diff --git a/pytube/__pycache__/__init__.cpython-39.pyc b/pytube/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..24ac25f376bc8f968311fa278f3b85eed7fe358f
Binary files /dev/null and b/pytube/__pycache__/__init__.cpython-39.pyc differ
diff --git a/pytube/__pycache__/__main__.cpython-310.pyc b/pytube/__pycache__/__main__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..53f42fd7eb921301c84e6ec8bb940c8c7fa3958a
Binary files /dev/null and b/pytube/__pycache__/__main__.cpython-310.pyc differ
diff --git a/pytube/__pycache__/__main__.cpython-39.pyc b/pytube/__pycache__/__main__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..02f607e04639c444259a17281ee72e7ae7eec83f
Binary files /dev/null and b/pytube/__pycache__/__main__.cpython-39.pyc differ
diff --git a/pytube/__pycache__/captions.cpython-310.pyc b/pytube/__pycache__/captions.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ddd7f91b1518ccec73d418e192dbc94ec71da310
Binary files /dev/null and b/pytube/__pycache__/captions.cpython-310.pyc differ
diff --git a/pytube/__pycache__/captions.cpython-39.pyc b/pytube/__pycache__/captions.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1e8f189f4ff7582ecc55e0e141d53e96b8c51d20
Binary files /dev/null and b/pytube/__pycache__/captions.cpython-39.pyc differ
diff --git a/pytube/__pycache__/cipher.cpython-310.pyc b/pytube/__pycache__/cipher.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6511343c9f3b6d5ac241ef6717fb414060236302
Binary files /dev/null and b/pytube/__pycache__/cipher.cpython-310.pyc differ
diff --git a/pytube/__pycache__/cipher.cpython-39.pyc b/pytube/__pycache__/cipher.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..39943e899c5b55ee337acba1e8100437de47dd8e
Binary files /dev/null and b/pytube/__pycache__/cipher.cpython-39.pyc differ
diff --git a/pytube/__pycache__/exceptions.cpython-310.pyc b/pytube/__pycache__/exceptions.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..32a7fa8879bdff838155b800052e283d8e031abb
Binary files /dev/null and b/pytube/__pycache__/exceptions.cpython-310.pyc differ
diff --git a/pytube/__pycache__/exceptions.cpython-39.pyc b/pytube/__pycache__/exceptions.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..445f03922e73c716d27c5cf82aef14d6394aceac
Binary files /dev/null and b/pytube/__pycache__/exceptions.cpython-39.pyc differ
diff --git a/pytube/__pycache__/extract.cpython-310.pyc b/pytube/__pycache__/extract.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eef71d15ea99f7b2ce9bdabd4d3894835bc6ce41
Binary files /dev/null and b/pytube/__pycache__/extract.cpython-310.pyc differ
diff --git a/pytube/__pycache__/extract.cpython-39.pyc b/pytube/__pycache__/extract.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..222522ec15e32abd3e6c0fdcb6df2c76ef7eb08c
Binary files /dev/null and b/pytube/__pycache__/extract.cpython-39.pyc differ
diff --git a/pytube/__pycache__/helpers.cpython-310.pyc b/pytube/__pycache__/helpers.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f9feebcb11892130cad8ed7f69fa59484d65cf4e
Binary files /dev/null and b/pytube/__pycache__/helpers.cpython-310.pyc differ
diff --git a/pytube/__pycache__/helpers.cpython-39.pyc b/pytube/__pycache__/helpers.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1fec63e34d3ce39e5c70766497a5e747a2a71c9d
Binary files /dev/null and b/pytube/__pycache__/helpers.cpython-39.pyc differ
diff --git a/pytube/__pycache__/innertube.cpython-310.pyc b/pytube/__pycache__/innertube.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..826b415b90c19a734bcb3f621779f6eed3cb26ff
Binary files /dev/null and b/pytube/__pycache__/innertube.cpython-310.pyc differ
diff --git a/pytube/__pycache__/innertube.cpython-39.pyc b/pytube/__pycache__/innertube.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3da0d221cd74b1ebbf3593a75d225665580fc87a
Binary files /dev/null and b/pytube/__pycache__/innertube.cpython-39.pyc differ
diff --git a/pytube/__pycache__/itags.cpython-310.pyc b/pytube/__pycache__/itags.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..022a2505df1ebc43c2530118a99a8470c2e67341
Binary files /dev/null and b/pytube/__pycache__/itags.cpython-310.pyc differ
diff --git a/pytube/__pycache__/itags.cpython-39.pyc b/pytube/__pycache__/itags.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..565ab774167f16e3e718fdd215a5beb08158a50c
Binary files /dev/null and b/pytube/__pycache__/itags.cpython-39.pyc differ
diff --git a/pytube/__pycache__/metadata.cpython-310.pyc b/pytube/__pycache__/metadata.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..494ee1cf075e84d51a1b63efc1f0d3a54f3e5e7e
Binary files /dev/null and b/pytube/__pycache__/metadata.cpython-310.pyc differ
diff --git a/pytube/__pycache__/metadata.cpython-39.pyc b/pytube/__pycache__/metadata.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bd0b5da02bfb1fa8fec2ffd4f8c703feb77423b1
Binary files /dev/null and b/pytube/__pycache__/metadata.cpython-39.pyc differ
diff --git a/pytube/__pycache__/monostate.cpython-310.pyc b/pytube/__pycache__/monostate.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6a6b92092689f4f2151b3318b1f38e62aecc6414
Binary files /dev/null and b/pytube/__pycache__/monostate.cpython-310.pyc differ
diff --git a/pytube/__pycache__/monostate.cpython-39.pyc b/pytube/__pycache__/monostate.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0ce8bd127834f4bc62a7212cfd725e7ef0f21d79
Binary files /dev/null and b/pytube/__pycache__/monostate.cpython-39.pyc differ
diff --git a/pytube/__pycache__/parser.cpython-310.pyc b/pytube/__pycache__/parser.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1acfc4137454b77b841b6ce8ea08a12e68b4459e
Binary files /dev/null and b/pytube/__pycache__/parser.cpython-310.pyc differ
diff --git a/pytube/__pycache__/parser.cpython-39.pyc b/pytube/__pycache__/parser.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ae93b7ac4c9f49156f75bdff0775cd739b3fd2aa
Binary files /dev/null and b/pytube/__pycache__/parser.cpython-39.pyc differ
diff --git a/pytube/__pycache__/query.cpython-310.pyc b/pytube/__pycache__/query.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a9c2752c9d6774f5409951cc70c5dc725b7eba8d
Binary files /dev/null and b/pytube/__pycache__/query.cpython-310.pyc differ
diff --git a/pytube/__pycache__/query.cpython-39.pyc b/pytube/__pycache__/query.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aed2d037c520df540cdabf711befa97e8679b9e7
Binary files /dev/null and b/pytube/__pycache__/query.cpython-39.pyc differ
diff --git a/pytube/__pycache__/request.cpython-310.pyc b/pytube/__pycache__/request.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..797d0f1f8c3687e47460dfb4963e2b2e11612bb3
Binary files /dev/null and b/pytube/__pycache__/request.cpython-310.pyc differ
diff --git a/pytube/__pycache__/request.cpython-39.pyc b/pytube/__pycache__/request.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0e7fa35dd40236f71da621d849b1662f09002c1d
Binary files /dev/null and b/pytube/__pycache__/request.cpython-39.pyc differ
diff --git a/pytube/__pycache__/streams.cpython-310.pyc b/pytube/__pycache__/streams.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..702e55b92d65a1c831167db5c856e85fdf833533
Binary files /dev/null and b/pytube/__pycache__/streams.cpython-310.pyc differ
diff --git a/pytube/__pycache__/streams.cpython-39.pyc b/pytube/__pycache__/streams.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5156381cbc356ce1e894313e5d441b83fca5bb4d
Binary files /dev/null and b/pytube/__pycache__/streams.cpython-39.pyc differ
diff --git a/pytube/__pycache__/version.cpython-310.pyc b/pytube/__pycache__/version.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e5494b09c76b42d6cdc098bac518b832a0e22bbb
Binary files /dev/null and b/pytube/__pycache__/version.cpython-310.pyc differ
diff --git a/pytube/__pycache__/version.cpython-39.pyc b/pytube/__pycache__/version.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c4d9190afb08811e4303014b684c53d65a0cba3a
Binary files /dev/null and b/pytube/__pycache__/version.cpython-39.pyc differ
diff --git a/pytube/captions.py b/pytube/captions.py
new file mode 100644
index 0000000000000000000000000000000000000000..ed55f9a2a3083d8d75f8611967a3b49666c66eba
--- /dev/null
+++ b/pytube/captions.py
@@ -0,0 +1,154 @@
+import math
+import os
+import time
+import xml.etree.ElementTree as ElementTree
+from html import unescape
+from typing import Dict, Optional
+
+from pytube import request
+from pytube.helpers import safe_filename, target_directory
+
+
+class Caption:
+    """Container for caption tracks."""
+
+    def __init__(self, caption_track: Dict):
+        """Construct a :class:`Caption <Caption>`.
+
+        :param dict caption_track:
+            Caption track data extracted from ``watch_html``.
+        """
+        self.url = caption_track.get("baseUrl")
+
+        # Certain videos have runs instead of simpleText
+        #  this handles that edge case
+        name_dict = caption_track['name']
+        if 'simpleText' in name_dict:
+            self.name = name_dict['simpleText']
+        else:
+            for el in name_dict['runs']:
+                if 'text' in el:
+                    self.name = el['text']
+
+        # Use "vssId" instead of "languageCode", fix issue #779
+        self.code = caption_track["vssId"]
+        # Remove preceding '.' for backwards compatibility, e.g.:
+        # English -> vssId: .en, languageCode: en
+        # English (auto-generated) -> vssId: a.en, languageCode: en
+        self.code = self.code.strip('.')
+
+    @property
+    def xml_captions(self) -> str:
+        """Download the xml caption tracks."""
+        return request.get(self.url)
+
+    def generate_srt_captions(self) -> str:
+        """Generate "SubRip Subtitle" captions.
+
+        Takes the xml captions from :meth:`~pytube.Caption.xml_captions` and
+        recompiles them into the "SubRip Subtitle" format.
+        """
+        return self.xml_caption_to_srt(self.xml_captions)
+
+    @staticmethod
+    def float_to_srt_time_format(d: float) -> str:
+        """Convert decimal durations into proper srt format.
+
+        :rtype: str
+        :returns:
+            SubRip Subtitle (str) formatted time duration.
+
+        float_to_srt_time_format(3.89) -> '00:00:03,890'
+        """
+        fraction, whole = math.modf(d)
+        time_fmt = time.strftime("%H:%M:%S,", time.gmtime(whole))
+        ms = f"{fraction:.3f}".replace("0.", "")
+        return time_fmt + ms
+
+    def xml_caption_to_srt(self, xml_captions: str) -> str:
+        """Convert xml caption tracks to "SubRip Subtitle (srt)".
+
+        :param str xml_captions:
+            XML formatted caption tracks.
+        """
+        segments = []
+        root = ElementTree.fromstring(xml_captions)
+        for i, child in enumerate(list(root)):
+            text = child.text or ""
+            caption = unescape(text.replace("\n", " ").replace("  ", " "),)
+            try:
+                duration = float(child.attrib["dur"])
+            except KeyError:
+                duration = 0.0
+            start = float(child.attrib["start"])
+            end = start + duration
+            sequence_number = i + 1  # convert from 0-indexed to 1.
+            line = "{seq}\n{start} --> {end}\n{text}\n".format(
+                seq=sequence_number,
+                start=self.float_to_srt_time_format(start),
+                end=self.float_to_srt_time_format(end),
+                text=caption,
+            )
+            segments.append(line)
+        return "\n".join(segments).strip()
+
+    def download(
+        self,
+        title: str,
+        srt: bool = True,
+        output_path: Optional[str] = None,
+        filename_prefix: Optional[str] = None,
+    ) -> str:
+        """Write the media stream to disk.
+
+        :param title:
+            Output filename (stem only) for writing media file.
+            If one is not specified, the default filename is used.
+        :type title: str
+        :param srt:
+            Set to True to download srt, false to download xml. Defaults to True.
+        :type srt bool
+        :param output_path:
+            (optional) Output path for writing media file. If one is not
+            specified, defaults to the current working directory.
+        :type output_path: str or None
+        :param filename_prefix:
+            (optional) A string that will be prepended to the filename.
+            For example a number in a playlist or the name of a series.
+            If one is not specified, nothing will be prepended
+            This is separate from filename so you can use the default
+            filename but still add a prefix.
+        :type filename_prefix: str or None
+
+        :rtype: str
+        """
+        if title.endswith(".srt") or title.endswith(".xml"):
+            filename = ".".join(title.split(".")[:-1])
+        else:
+            filename = title
+
+        if filename_prefix:
+            filename = f"{safe_filename(filename_prefix)}{filename}"
+
+        filename = safe_filename(filename)
+
+        filename += f" ({self.code})"
+
+        if srt:
+            filename += ".srt"
+        else:
+            filename += ".xml"
+
+        file_path = os.path.join(target_directory(output_path), filename)
+
+        with open(file_path, "w", encoding="utf-8") as file_handle:
+            if srt:
+                file_handle.write(self.generate_srt_captions())
+            else:
+                file_handle.write(self.xml_captions)
+
+        return file_path
+
+    def __repr__(self):
+        """Printable object representation."""
+        return '<Caption lang="{s.name}" code="{s.code}">'.format(s=self)
diff --git a/pytube/cipher.py b/pytube/cipher.py
new file mode 100644
index 0000000000000000000000000000000000000000..d385d83a491b76ab44e0b464186cd374c7e14e69
--- /dev/null
+++ b/pytube/cipher.py
@@ -0,0 +1,697 @@
+"""
+This module contains all logic necessary to decipher the signature.
+
+YouTube's strategy to restrict downloading videos is to send a ciphered version
+of the signature to the client, along with the decryption algorithm obfuscated
+in JavaScript. For the clients to play the videos, JavaScript must take the
+ciphered version, cycle it through a series of "transform functions," and then
+signs the media URL with the output.
+
+This module is responsible for (1) finding and extracting those "transform
+functions" (2) maps them to Python equivalents and (3) taking the ciphered
+signature and decoding it.
+
+"""
+import logging
+import re
+from itertools import chain
+from typing import Any, Callable, Dict, List, Optional, Tuple
+
+from pytube.exceptions import ExtractError, RegexMatchError
+from pytube.helpers import cache, regex_search
+from pytube.parser import find_object_from_startpoint, throttling_array_split
+
+logger = logging.getLogger(__name__)
+
+
+class Cipher:
+    def __init__(self, js: str):
+        self.transform_plan: List[str] = get_transform_plan(js)
+        var_regex = re.compile(r"^\w+\W")
+        var_match = var_regex.search(self.transform_plan[0])
+        if not var_match:
+            raise RegexMatchError(
+                caller="__init__", pattern=var_regex.pattern
+            )
+        var = var_match.group(0)[:-1]
+        self.transform_map = get_transform_map(js, var)
+        self.js_func_patterns = [
+            r"\w+\.(\w+)\(\w,(\d+)\)",
+            r"\w+\[(\"\w+\")\]\(\w,(\d+)\)"
+        ]
+
+        self.throttling_plan = get_throttling_plan(js)
+        self.throttling_array = get_throttling_function_array(js)
+
+        self.calculated_n = None
+
+    def calculate_n(self, initial_n: list):
+        """Converts n to the correct value to prevent throttling."""
+        if self.calculated_n:
+            return self.calculated_n
+
+        # First, update all instances of 'b' with the list(initial_n)
+        for i in range(len(self.throttling_array)):
+            if self.throttling_array[i] == 'b':
+                self.throttling_array[i] = initial_n
+
+        for step in self.throttling_plan:
+            curr_func = self.throttling_array[int(step[0])]
+            if not callable(curr_func):
+                logger.debug(f'{curr_func} is not callable.')
+                logger.debug(f'Throttling array:\n{self.throttling_array}\n')
+                raise ExtractError(f'{curr_func} is not callable.')
+
+            first_arg = self.throttling_array[int(step[1])]
+
+            if len(step) == 2:
+                curr_func(first_arg)
+            elif len(step) == 3:
+                second_arg = self.throttling_array[int(step[2])]
+                curr_func(first_arg, second_arg)
+
+        self.calculated_n = ''.join(initial_n)
+        return self.calculated_n
+
+    def get_signature(self, ciphered_signature: str) -> str:
+        """Decipher the signature.
+
+        Taking the ciphered signature, applies the transform functions.
+
+        :param str ciphered_signature:
+            The ciphered signature sent in the ``player_config``.
+        :rtype: str
+        :returns:
+            Decrypted signature required to download the media content.
+        """
+        signature = list(ciphered_signature)
+
+        for js_func in self.transform_plan:
+            name, argument = self.parse_function(js_func)  # type: ignore
+            signature = self.transform_map[name](signature, argument)
+            logger.debug(
+                "applied transform function\n"
+                "output: %s\n"
+                "js_function: %s\n"
+                "argument: %d\n"
+                "function: %s",
+                "".join(signature),
+                name,
+                argument,
+                self.transform_map[name],
+            )
+
+        return "".join(signature)
+
+    @cache
+    def parse_function(self, js_func: str) -> Tuple[str, int]:
+        """Parse the Javascript transform function.
+
+        Break a JavaScript transform function down into a two element ``tuple``
+        containing the function name and some integer-based argument.
+
+        :param str js_func:
+            The JavaScript version of the transform function.
+        :rtype: tuple
+        :returns:
+            two element tuple containing the function name and an argument.
+
+        **Example**:
+
+        parse_function('DE.AJ(a,15)')
+        ('AJ', 15)
+
+        """
+        logger.debug("parsing transform function")
+        for pattern in self.js_func_patterns:
+            regex = re.compile(pattern)
+            parse_match = regex.search(js_func)
+            if parse_match:
+                fn_name, fn_arg = parse_match.groups()
+                return fn_name, int(fn_arg)
+
+        raise RegexMatchError(
+            caller="parse_function", pattern="js_func_patterns"
+        )
+
+
+def get_initial_function_name(js: str) -> str:
+    """Extract the name of the function responsible for computing the signature.
+    :param str js:
+        The contents of the base.js asset file.
+    :rtype: str
+    :returns:
+        Function name from regex match
+    """
+
+    function_patterns = [
+        r"\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(",  # noqa: E501
+        r"\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(",  # noqa: E501
+        r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',  # noqa: E501
+        r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',  # noqa: E501
+        r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+        r"\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(",
+        r"yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(",  # noqa: E501
+        r"\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(",  # noqa: E501
+        r"\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(",  # noqa: E501
+        r"\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(",  # noqa: E501
+        r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(",  # noqa: E501
+        r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(",  # noqa: E501
+    ]
+    logger.debug("finding initial function name")
+    for pattern in function_patterns:
+        regex = re.compile(pattern)
+        function_match = regex.search(js)
+        if function_match:
+            logger.debug("finished regex search, matched: %s", pattern)
+            return function_match.group(1)
+
+    raise RegexMatchError(
+        caller="get_initial_function_name", pattern="multiple"
+    )
+
+
+def get_transform_plan(js: str) -> List[str]:
+    """Extract the "transform plan".
+
+    The "transform plan" is the functions that the ciphered signature is
+    cycled through to obtain the actual signature.
+
+    :param str js:
+        The contents of the base.js asset file.
+
+    **Example**:
+
+    ['DE.AJ(a,15)',
+    'DE.VR(a,3)',
+    'DE.AJ(a,51)',
+    'DE.VR(a,3)',
+    'DE.kT(a,51)',
+    'DE.kT(a,8)',
+    'DE.VR(a,3)',
+    'DE.kT(a,21)']
+    """
+    name = re.escape(get_initial_function_name(js))
+    pattern = r"%s=function\(\w\){[a-z=\.\(\"\)]*;(.*);(?:.+)}" % name
+    logger.debug("getting transform plan")
+    return regex_search(pattern, js, group=1).split(";")
+
+
+def get_transform_object(js: str, var: str) -> List[str]:
+    """Extract the "transform object".
+
+    The "transform object" contains the function definitions referenced in the
+    "transform plan". The ``var`` argument is the obfuscated variable name
+    which contains these functions, for example, given the function call
+    ``DE.AJ(a,15)`` returned by the transform plan, "DE" would be the var.
+
+    :param str js:
+        The contents of the base.js asset file.
+    :param str var:
+        The obfuscated variable name that stores an object with all functions
+        that descrambles the signature.
+
+    **Example**:
+
+    >>> get_transform_object(js, 'DE')
+    ['AJ:function(a){a.reverse()}',
+    'VR:function(a,b){a.splice(0,b)}',
+    'kT:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}']
+
+    """
+    pattern = r"var %s={(.*?)};" % re.escape(var)
+    logger.debug("getting transform object")
+    regex = re.compile(pattern, flags=re.DOTALL)
+    transform_match = regex.search(js)
+    if not transform_match:
+        raise RegexMatchError(caller="get_transform_object", pattern=pattern)
+
+    return transform_match.group(1).replace("\n", " ").split(", ")
+
+
+def get_transform_map(js: str, var: str) -> Dict:
+    """Build a transform function lookup.
+
+    Build a lookup table of obfuscated JavaScript function names to the
+    Python equivalents.
+
+    :param str js:
+        The contents of the base.js asset file.
+    :param str var:
+        The obfuscated variable name that stores an object with all functions
+        that descrambles the signature.
+
+    """
+    transform_object = get_transform_object(js, var)
+    mapper = {}
+    for obj in transform_object:
+        # AJ:function(a){a.reverse()} => AJ, function(a){a.reverse()}
+        name, function = obj.split(":", 1)
+        fn = map_functions(function)
+        mapper[name] = fn
+    return mapper
+
+
+def get_throttling_function_name(js: str) -> str:
+    """Extract the name of the function that computes the throttling parameter.
+
+    :param str js:
+        The contents of the base.js asset file.
+    :rtype: str
+    :returns:
+        The name of the function used to compute the throttling parameter.
+    """
+    function_patterns = [
+        # https://github.com/ytdl-org/youtube-dl/issues/29326#issuecomment-865985377
+        # https://github.com/yt-dlp/yt-dlp/commit/48416bc4a8f1d5ff07d5977659cb8ece7640dcd8
+        # var Bpa = [iha];
+        # ...
+        # a.C && (b = a.get("n")) && (b = Bpa[0](b), a.set("n", b),
+        # Bpa.length || iha("")) }};
+        # In the above case, `iha` is the relevant function name
+        r'a\.[a-zA-Z]\s*&&\s*\([a-z]\s*=\s*a\.get\("n"\)\)\s*&&\s*'
+        r'\([a-z]\s*=\s*([a-zA-Z0-9$]+)(\[\d+\])?\([a-z]\)',
+    ]
+    logger.debug('Finding throttling function name')
+    for pattern in function_patterns:
+        regex = re.compile(pattern)
+        function_match = regex.search(js)
+        if function_match:
+            logger.debug("finished regex search, matched: %s", pattern)
+            if len(function_match.groups()) == 1:
+                return function_match.group(1)
+            idx = function_match.group(2)
+            if idx:
+                idx = idx.strip("[]")
+                array = re.search(
+                    r'var {nfunc}\s*=\s*(\[.+?\]);'.format(
+                        nfunc=re.escape(function_match.group(1))),
+                    js
+                )
+                if array:
+                    array = array.group(1).strip("[]").split(",")
+                    array = [x.strip() for x in array]
+                    return array[int(idx)]
+
+    raise RegexMatchError(
+        caller="get_throttling_function_name", pattern="multiple"
+    )
+
+
+def get_throttling_function_code(js: str) -> str:
+    """Extract the raw code for the throttling function.
+
+    :param str js:
+        The contents of the base.js asset file.
+    :rtype: str
+    :returns:
+        The name of the function used to compute the throttling parameter.
+    """
+    # Begin by extracting the correct function name
+    name = re.escape(get_throttling_function_name(js))
+
+    # Identify where the function is defined
+    pattern_start = r"%s=function\(\w\)" % name
+    regex = re.compile(pattern_start)
+    match = regex.search(js)
+
+    # Extract the code within curly braces for the function itself, and merge any split lines
+    code_lines_list = find_object_from_startpoint(js, match.span()[1]).split('\n')
+    joined_lines = "".join(code_lines_list)
+
+    # Prepend function definition (e.g. `Dea=function(a)`)
+    return match.group(0) + joined_lines
+
+
+def get_throttling_function_array(js: str) -> List[Any]:
+    """Extract the "c" array.
+
+    :param str js:
+        The contents of the base.js asset file.
+    :returns:
+        The array of various integers, arrays, and functions.
+    """
+    raw_code = get_throttling_function_code(js)
+
+    array_start = r",c=\["
+    array_regex = re.compile(array_start)
+    match = array_regex.search(raw_code)
+
+    array_raw = find_object_from_startpoint(raw_code, match.span()[1] - 1)
+    str_array = throttling_array_split(array_raw)
+
+    converted_array = []
+    for el in str_array:
+        try:
+            converted_array.append(int(el))
+            continue
+        except ValueError:
+            # Not an integer value.
+            pass
+
+        if el == 'null':
+            converted_array.append(None)
+            continue
+
+        if el.startswith('"') and el.endswith('"'):
+            # Convert e.g. '"abcdef"' to string without quotation marks, 'abcdef'
+            converted_array.append(el[1:-1])
+            continue
+
+        if el.startswith('function'):
+            mapper = (
+                (r"{for\(\w=\(\w%\w\.length\+\w\.length\)%\w\.length;\w--;\)\w\.unshift\(\w.pop\(\)\)}", throttling_unshift),  # noqa:E501
+                (r"{\w\.reverse\(\)}", throttling_reverse),
+                (r"{\w\.push\(\w\)}", throttling_push),
+                (r";var\s\w=\w\[0\];\w\[0\]=\w\[\w\];\w\[\w\]=\w}", throttling_swap),
+                (r"case\s\d+", throttling_cipher_function),
+                (r"\w\.splice\(0,1,\w\.splice\(\w,1,\w\[0\]\)\[0\]\)", throttling_nested_splice),  # noqa:E501
+                (r";\w\.splice\(\w,1\)}", js_splice),
+                (r"\w\.splice\(-\w\)\.reverse\(\)\.forEach\(function\(\w\){\w\.unshift\(\w\)}\)", throttling_prepend),  # noqa:E501
+                (r"for\(var \w=\w\.length;\w;\)\w\.push\(\w\.splice\(--\w,1\)\[0\]\)}", throttling_reverse),  # noqa:E501
+            )
+
+            found = False
+            for pattern, fn in mapper:
+                if re.search(pattern, el):
+                    converted_array.append(fn)
+                    found = True
+            if found:
+                continue
+
+        converted_array.append(el)
+
+    # Replace null elements with array itself
+    for i in range(len(converted_array)):
+        if converted_array[i] is None:
+            converted_array[i] = converted_array
+
+    return converted_array
+
+
+def get_throttling_plan(js: str):
+    """Extract the "throttling plan".
+
+    The "throttling plan" is a list of tuples used for calling functions
+    in the c array. The first element of the tuple is the index of the
+    function to call, and any remaining elements of the tuple are arguments
+    to pass to that function.
+
+    :param str js:
+        The contents of the base.js asset file.
+    :returns:
+        The full function code for computing the throttlign parameter.
+    """
+    raw_code = get_throttling_function_code(js)
+
+    transform_start = r"try{"
+    plan_regex = re.compile(transform_start)
+    match = plan_regex.search(raw_code)
+
+    transform_plan_raw = find_object_from_startpoint(raw_code, match.span()[1] - 1)
+
+    # Steps are either c[x](c[y]) or c[x](c[y],c[z])
+    step_start = r"c\[(\d+)\]\(c\[(\d+)\](,c(\[(\d+)\]))?\)"
+    step_regex = re.compile(step_start)
+    matches = step_regex.findall(transform_plan_raw)
+    transform_steps = []
+    for match in matches:
+        if match[4] != '':
+            transform_steps.append((match[0],match[1],match[4]))
+        else:
+            transform_steps.append((match[0],match[1]))
+
+    return transform_steps
+
+
+def reverse(arr: List, _: Optional[Any]):
+    """Reverse elements in a list.
+
+    This function is equivalent to:
+
+    .. code-block:: javascript
+
+        function(a, b) { a.reverse() }
+
+    This method takes an unused ``b`` variable as their transform functions
+    universally sent two arguments.
+
+    **Example**:
+
+    >>> reverse([1, 2, 3, 4])
+    [4, 3, 2, 1]
+    """
+    return arr[::-1]
+
+
+def splice(arr: List, b: int):
+    """Add/remove items to/from a list.
+
+    This function is equivalent to:
+
+    .. code-block:: javascript
+
+        function(a, b) { a.splice(0, b) }
+
+    **Example**:
+
+    >>> splice([1, 2, 3, 4], 2)
+    [1, 2]
+    """
+    return arr[b:]
+
+
+def swap(arr: List, b: int):
+    """Swap positions at b modulus the list length.
+
+    This function is equivalent to:
+
+    .. code-block:: javascript
+
+        function(a, b) { var c=a[0];a[0]=a[b%a.length];a[b]=c }
+
+    **Example**:
+
+    >>> swap([1, 2, 3, 4], 2)
+    [3, 2, 1, 4]
+    """
+    r = b % len(arr)
+    return list(chain([arr[r]], arr[1:r], [arr[0]], arr[r + 1 :]))
+
+
+def throttling_reverse(arr: list):
+    """Reverses the input list.
+
+    Needs to do an in-place reversal so that the passed list gets changed.
+    To accomplish this, we create a reversed copy, and then change each
+    indvidual element.
+    """
+    reverse_copy = arr.copy()[::-1]
+    for i in range(len(reverse_copy)):
+        arr[i] = reverse_copy[i]
+
+
+def throttling_push(d: list, e: Any):
+    """Pushes an element onto a list."""
+    d.append(e)
+
+
+def throttling_mod_func(d: list, e: int):
+    """Perform the modular function from the throttling array functions.
+
+    In the javascript, the modular operation is as follows:
+    e = (e % d.length + d.length) % d.length
+
+    We simply translate this to python here.
+    """
+    return (e % len(d) + len(d)) % len(d)
+
+
+def throttling_unshift(d: list, e: int):
+    """Rotates the elements of the list to the right.
+
+    In the javascript, the operation is as follows:
+    for(e=(e%d.length+d.length)%d.length;e--;)d.unshift(d.pop())
+    """
+    e = throttling_mod_func(d, e)
+    new_arr = d[-e:] + d[:-e]
+    d.clear()
+    for el in new_arr:
+        d.append(el)
+
+
+def throttling_cipher_function(d: list, e: str):
+    """This ciphers d with e to generate a new list.
+
+    In the javascript, the operation is as follows:
+    var h = [A-Za-z0-9-_], f = 96;  // simplified from switch-case loop
+    d.forEach(
+        function(l,m,n){
+            this.push(
+                n[m]=h[
+                    (h.indexOf(l)-h.indexOf(this[m])+m-32+f--)%h.length
+                ]
+            )
+        },
+        e.split("")
+    )
+    """
+    h = list('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_')
+    f = 96
+    # by naming it "this" we can more closely reflect the js
+    this = list(e)
+
+    # This is so we don't run into weirdness with enumerate while
+    #  we change the input list
+    copied_list = d.copy()
+
+    for m, l in enumerate(copied_list):
+        bracket_val = (h.index(l) - h.index(this[m]) + m - 32 + f) % len(h)
+        this.append(
+            h[bracket_val]
+        )
+        d[m] = h[bracket_val]
+        f -= 1
+
+
+def throttling_nested_splice(d: list, e: int):
+    """Nested splice function in throttling js.
+
+    In the javascript, the operation is as follows:
+    function(d,e){
+        e=(e%d.length+d.length)%d.length;
+        d.splice(
+            0,
+            1,
+            d.splice(
+                e,
+                1,
+                d[0]
+            )[0]
+        )
+    }
+
+    While testing, all this seemed to do is swap element 0 and e,
+    but the actual process is preserved in case there was an edge
+    case that was not considered.
+    """
+    e = throttling_mod_func(d, e)
+    inner_splice = js_splice(
+        d,
+        e,
+        1,
+        d[0]
+    )
+    js_splice(
+        d,
+        0,
+        1,
+        inner_splice[0]
+    )
+
+
+def throttling_prepend(d: list, e: int):
+    """
+
+    In the javascript, the operation is as follows:
+    function(d,e){
+        e=(e%d.length+d.length)%d.length;
+        d.splice(-e).reverse().forEach(
+            function(f){
+                d.unshift(f)
+            }
+        )
+    }
+
+    Effectively, this moves the last e elements of d to the beginning.
+    """
+    start_len = len(d)
+    # First, calculate e
+    e = throttling_mod_func(d, e)
+
+    # Then do the prepending
+    new_arr = d[-e:] + d[:-e]
+
+    # And update the input list
+    d.clear()
+    for el in new_arr:
+        d.append(el)
+
+    end_len = len(d)
+    assert start_len == end_len
+
+
+def throttling_swap(d: list, e: int):
+    """Swap positions of the 0'th and e'th elements in-place."""
+    e = throttling_mod_func(d, e)
+    f = d[0]
+    d[0] = d[e]
+    d[e] = f
+
+
+def js_splice(arr: list, start: int, delete_count=None, *items):
+    """Implementation of javascript's splice function.
+
+    :param list arr:
+        Array to splice
+    :param int start:
+        Index at which to start changing the array
+    :param int delete_count:
+        Number of elements to delete from the array
+    :param *items:
+        Items to add to the array
+
+    Reference: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/splice  # noqa:E501
+    """
+    # Special conditions for start value
+    try:
+        if start > len(arr):
+            start = len(arr)
+        # If start is negative, count backwards from end
+        if start < 0:
+            start = len(arr) - start
+    except TypeError:
+        # Non-integer start values are treated as 0 in js
+        start = 0
+
+    # Special condition when delete_count is greater than remaining elements
+    if not delete_count or delete_count >= len(arr) - start:
+        delete_count = len(arr) - start  # noqa: N806
+
+    deleted_elements = arr[start:start + delete_count]
+
+    # Splice appropriately.
+    new_arr = arr[:start] + list(items) + arr[start + delete_count:]
+
+    # Replace contents of input array
+    arr.clear()
+    for el in new_arr:
+        arr.append(el)
+
+    return deleted_elements
+
+
+def map_functions(js_func: str) -> Callable:
+    """For a given JavaScript transform function, return the Python equivalent.
+
+    :param str js_func:
+        The JavaScript version of the transform function.
+    """
+    mapper = (
+        # function(a){a.reverse()}
+        (r"{\w\.reverse\(\)}", reverse),
+        # function(a,b){a.splice(0,b)}
+        (r"{\w\.splice\(0,\w\)}", splice),
+        # function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}
+        (r"{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\]=\w}", swap),
+        # function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c}
+        (
+            r"{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\%\w.length\]=\w}",
+            swap,
+        ),
+    )
+
+    for pattern, fn in mapper:
+        if re.search(pattern, js_func):
+            return fn
+    raise RegexMatchError(caller="map_functions", pattern="multiple")
diff --git a/pytube/cli.py b/pytube/cli.py
new file mode 100755
index 0000000000000000000000000000000000000000..7a9885478a6403d6029430d43fa421c752b1e00f
--- /dev/null
+++ b/pytube/cli.py
@@ -0,0 +1,560 @@
+#!/usr/bin/env python3
+"""A simple command line application to download youtube videos."""
+import argparse
+import gzip
+import json
+import logging
+import os
+import shutil
+import sys
+import datetime as dt
+import subprocess  # nosec
+from typing import List, Optional
+
+import pytube.exceptions as exceptions
+from pytube import __version__
+from pytube import CaptionQuery, Playlist, Stream, YouTube
+from pytube.helpers import safe_filename, setup_logger
+
+
+logger = logging.getLogger(__name__)
+
+
+def main():
+    """Command line application to download youtube videos."""
+    # noinspection PyTypeChecker
+    parser = argparse.ArgumentParser(description=main.__doc__)
+    args = _parse_args(parser)
+    if args.verbose:
+        log_filename = None
+        if args.logfile:
+            log_filename = args.logfile
+        setup_logger(logging.DEBUG, log_filename=log_filename)
+        logger.debug(f'Pytube version: {__version__}')
+
+    if not args.url or "youtu" not in args.url:
+        parser.print_help()
+        sys.exit(1)
+
+    if "/playlist" in args.url:
+        print("Loading playlist...")
+        playlist = Playlist(args.url)
+        if not args.target:
+            args.target = safe_filename(playlist.title)
+        for youtube_video in playlist.videos:
+            try:
+                _perform_args_on_youtube(youtube_video, args)
+            except exceptions.PytubeError as e:
+                print(f"There was an error with video: {youtube_video}")
+                print(e)
+    else:
+        print("Loading video...")
+        youtube = YouTube(args.url)
+        _perform_args_on_youtube(youtube, args)
+
+
+def _perform_args_on_youtube(
+    youtube: YouTube, args: argparse.Namespace
+) -> None:
+    if len(sys.argv) == 2 :  # no arguments parsed
+        download_highest_resolution_progressive(
+            youtube=youtube, resolution="highest", target=args.target
+        )
+    if args.list_captions:
+        _print_available_captions(youtube.captions)
+    if args.list:
+        display_streams(youtube)
+    if args.build_playback_report:
+        build_playback_report(youtube)
+    if args.itag:
+        download_by_itag(youtube=youtube, itag=args.itag, target=args.target)
+    if args.caption_code:
+        download_caption(
+            youtube=youtube, lang_code=args.caption_code, target=args.target
+        )
+    if args.resolution:
+        download_by_resolution(
+            youtube=youtube, resolution=args.resolution, target=args.target
+        )
+    if args.audio:
+        download_audio(
+            youtube=youtube, filetype=args.audio, target=args.target
+        )
+    if args.ffmpeg:
+        ffmpeg_process(
+            youtube=youtube, resolution=args.ffmpeg, target=args.target
+        )
+
+
+def _parse_args(
+    parser: argparse.ArgumentParser, args: Optional[List] = None
+) -> argparse.Namespace:
+    parser.add_argument(
+        "url", help="The YouTube /watch or /playlist url", nargs="?"
+    )
+    parser.add_argument(
+        "--version", action="version", version="%(prog)s " + __version__,
+    )
+    parser.add_argument(
+        "--itag", type=int, help="The itag for the desired stream",
+    )
+    parser.add_argument(
+        "-r",
+        "--resolution",
+        type=str,
+        help="The resolution for the desired stream",
+    )
+    parser.add_argument(
+        "-l",
+        "--list",
+        action="store_true",
+        help=(
+            "The list option causes pytube cli to return a list of streams "
+            "available to download"
+        ),
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        dest="verbose",
+        help="Set logger output to verbose output.",
+    )
+    parser.add_argument(
+        "--logfile",
+        action="store",
+        help="logging debug and error messages into a log file",
+    )
+    parser.add_argument(
+        "--build-playback-report",
+        action="store_true",
+        help="Save the html and js to disk",
+    )
+    parser.add_argument(
+        "-c",
+        "--caption-code",
+        type=str,
+        help=(
+            "Download srt captions for given language code. "
+            "Prints available language codes if no argument given"
+        ),
+    )
+    parser.add_argument(
+        '-lc',
+        '--list-captions',
+        action='store_true',
+        help=(
+            "List available caption codes for a video"
+        )
+    )
+    parser.add_argument(
+        "-t",
+        "--target",
+        help=(
+            "The output directory for the downloaded stream. "
+            "Default is current working directory"
+        ),
+    )
+    parser.add_argument(
+        "-a",
+        "--audio",
+        const="mp4",
+        nargs="?",
+        help=(
+            "Download the audio for a given URL at the highest bitrate available"
+            "Defaults to mp4 format if none is specified"
+        ),
+    )
+    parser.add_argument(
+        "-f",
+        "--ffmpeg",
+        const="best",
+        nargs="?",
+        help=(
+            "Downloads the audio and video stream for resolution provided"
+            "If no resolution is provided, downloads the best resolution"
+            "Runs the command line program ffmpeg to combine the audio and video"
+        ),
+    )
+
+    return parser.parse_args(args)
+
+
+def build_playback_report(youtube: YouTube) -> None:
+    """Serialize the request data to json for offline debugging.
+
+    :param YouTube youtube:
+        A YouTube object.
+    """
+    ts = int(dt.datetime.utcnow().timestamp())
+    fp = os.path.join(os.getcwd(), f"yt-video-{youtube.video_id}-{ts}.json.gz")
+
+    js = youtube.js
+    watch_html = youtube.watch_html
+    vid_info = youtube.vid_info
+
+    with gzip.open(fp, "wb") as fh:
+        fh.write(
+            json.dumps(
+                {
+                    "url": youtube.watch_url,
+                    "js": js,
+                    "watch_html": watch_html,
+                    "video_info": vid_info,
+                }
+            ).encode("utf8"),
+        )
+
+
+def display_progress_bar(
+    bytes_received: int, filesize: int, ch: str = "█", scale: float = 0.55
+) -> None:
+    """Display a simple, pretty progress bar.
+
+    Example:
+    ~~~~~~~~
+    PSY - GANGNAM STYLE(강남스타일) MV.mp4
+    ↳ |███████████████████████████████████████| 100.0%
+
+    :param int bytes_received:
+        The delta between the total file size (bytes) and bytes already
+        written to disk.
+    :param int filesize:
+        File size of the media stream in bytes.
+    :param str ch:
+        Character to use for presenting progress segment.
+    :param float scale:
+        Scale multiplier to reduce progress bar size.
+
+    """
+    columns = shutil.get_terminal_size().columns
+    max_width = int(columns * scale)
+
+    filled = int(round(max_width * bytes_received / float(filesize)))
+    remaining = max_width - filled
+    progress_bar = ch * filled + " " * remaining
+    percent = round(100.0 * bytes_received / float(filesize), 1)
+    text = f" ↳ |{progress_bar}| {percent}%\r"
+    sys.stdout.write(text)
+    sys.stdout.flush()
+
+
+# noinspection PyUnusedLocal
+def on_progress(
+    stream: Stream, chunk: bytes, bytes_remaining: int
+) -> None:  # pylint: disable=W0613
+    filesize = stream.filesize
+    bytes_received = filesize - bytes_remaining
+    display_progress_bar(bytes_received, filesize)
+
+
+def _download(
+    stream: Stream,
+    target: Optional[str] = None,
+    filename: Optional[str] = None,
+) -> None:
+    filesize_megabytes = stream.filesize // 1048576
+    print(f"{filename or stream.default_filename} | {filesize_megabytes} MB")
+    file_path = stream.get_file_path(filename=filename, output_path=target)
+    if stream.exists_at_path(file_path):
+        print(f"Already downloaded at:\n{file_path}")
+        return
+
+    stream.download(output_path=target, filename=filename)
+    sys.stdout.write("\n")
+
+
+def _unique_name(base: str, subtype: str, media_type: str, target: str) -> str:
+    """
+    Given a base name, the file format, and the target directory, will generate
+    a filename unique for that directory and file format.
+    :param str base:
+        The given base-name.
+    :param str subtype:
+        The filetype of the video which will be downloaded.
+    :param str media_type:
+        The media_type of the file, ie. "audio" or "video"
+    :param Path target:
+        Target directory for download.
+    """
+    counter = 0
+    while True:
+        file_name = f"{base}_{media_type}_{counter}"
+        file_path = os.path.join(target, f"{file_name}.{subtype}")
+        if not os.path.exists(file_path):
+            return file_name
+        counter += 1
+
+
+def ffmpeg_process(
+    youtube: YouTube, resolution: str, target: Optional[str] = None
+) -> None:
+    """
+    Decides the correct video stream to download, then calls _ffmpeg_downloader.
+
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param str resolution:
+        YouTube video resolution.
+    :param str target:
+        Target directory for download
+    """
+    youtube.register_on_progress_callback(on_progress)
+    target = target or os.getcwd()
+
+    if resolution == "best":
+        highest_quality_stream = (
+            youtube.streams.filter(progressive=False)
+            .order_by("resolution")
+            .last()
+        )
+        mp4_stream = (
+            youtube.streams.filter(progressive=False, subtype="mp4")
+            .order_by("resolution")
+            .last()
+        )
+        if highest_quality_stream.resolution == mp4_stream.resolution:
+            video_stream = mp4_stream
+        else:
+            video_stream = highest_quality_stream
+    else:
+        video_stream = youtube.streams.filter(
+            progressive=False, resolution=resolution, subtype="mp4"
+        ).first()
+        if not video_stream:
+            video_stream = youtube.streams.filter(
+                progressive=False, resolution=resolution
+            ).first()
+    if video_stream is None:
+        print(f"Could not find a stream with resolution: {resolution}")
+        print("Try one of these:")
+        display_streams(youtube)
+        sys.exit()
+
+    audio_stream = youtube.streams.get_audio_only(video_stream.subtype)
+    if not audio_stream:
+        audio_stream = (
+            youtube.streams.filter(only_audio=True).order_by("abr").last()
+        )
+    if not audio_stream:
+        print("Could not find an audio only stream")
+        sys.exit()
+    _ffmpeg_downloader(
+        audio_stream=audio_stream, video_stream=video_stream, target=target
+    )
+
+
+def _ffmpeg_downloader(
+    audio_stream: Stream, video_stream: Stream, target: str
+) -> None:
+    """
+    Given a YouTube Stream object, finds the correct audio stream, downloads them both
+    giving them a unique name, them uses ffmpeg to create a new file with the audio
+    and video from the previously downloaded files. Then deletes the original adaptive
+    streams, leaving the combination.
+
+    :param Stream audio_stream:
+        A valid Stream object representing the audio to download
+    :param Stream video_stream:
+        A valid Stream object representing the video to download
+    :param Path target:
+        A valid Path object
+    """
+    video_unique_name = _unique_name(
+        safe_filename(video_stream.title),
+        video_stream.subtype,
+        "video",
+        target=target,
+    )
+    audio_unique_name = _unique_name(
+        safe_filename(video_stream.title),
+        audio_stream.subtype,
+        "audio",
+        target=target,
+    )
+    _download(stream=video_stream, target=target, filename=video_unique_name)
+    print("Loading audio...")
+    _download(stream=audio_stream, target=target, filename=audio_unique_name)
+
+    video_path = os.path.join(
+        target, f"{video_unique_name}.{video_stream.subtype}"
+    )
+    audio_path = os.path.join(
+        target, f"{audio_unique_name}.{audio_stream.subtype}"
+    )
+    final_path = os.path.join(
+        target, f"{safe_filename(video_stream.title)}.{video_stream.subtype}"
+    )
+
+    subprocess.run(  # nosec
+        [
+            "ffmpeg",
+            "-i",
+            video_path,
+            "-i",
+            audio_path,
+            "-codec",
+            "copy",
+            final_path,
+        ]
+    )
+    os.unlink(video_path)
+    os.unlink(audio_path)
+
+
+def download_by_itag(
+    youtube: YouTube, itag: int, target: Optional[str] = None
+) -> None:
+    """Start downloading a YouTube video.
+
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param int itag:
+        YouTube format identifier code.
+    :param str target:
+        Target directory for download
+    """
+    stream = youtube.streams.get_by_itag(itag)
+    if stream is None:
+        print(f"Could not find a stream with itag: {itag}")
+        print("Try one of these:")
+        display_streams(youtube)
+        sys.exit()
+
+    youtube.register_on_progress_callback(on_progress)
+
+    try:
+        _download(stream, target=target)
+    except KeyboardInterrupt:
+        sys.exit()
+
+
+def download_by_resolution(
+    youtube: YouTube, resolution: str, target: Optional[str] = None
+) -> None:
+    """Start downloading a YouTube video.
+
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param str resolution:
+        YouTube video resolution.
+    :param str target:
+        Target directory for download
+    """
+    # TODO(nficano): allow dash itags to be selected
+    stream = youtube.streams.get_by_resolution(resolution)
+    if stream is None:
+        print(f"Could not find a stream with resolution: {resolution}")
+        print("Try one of these:")
+        display_streams(youtube)
+        sys.exit()
+
+    youtube.register_on_progress_callback(on_progress)
+
+    try:
+        _download(stream, target=target)
+    except KeyboardInterrupt:
+        sys.exit()
+
+
+def download_highest_resolution_progressive(
+    youtube: YouTube, resolution: str, target: Optional[str] = None
+) -> None:
+    """Start downloading the highest resolution progressive stream.
+
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param str resolution:
+        YouTube video resolution.
+    :param str target:
+        Target directory for download
+    """
+    youtube.register_on_progress_callback(on_progress)
+    try:
+        stream = youtube.streams.get_highest_resolution()
+    except exceptions.VideoUnavailable as err:
+        print(f"No video streams available: {err}")
+    else:
+        try:
+            _download(stream, target=target)
+        except KeyboardInterrupt:
+            sys.exit()
+
+
+def display_streams(youtube: YouTube) -> None:
+    """Probe YouTube video and lists its available formats.
+
+    :param YouTube youtube:
+        A valid YouTube watch URL.
+
+    """
+    for stream in youtube.streams:
+        print(stream)
+
+
+def _print_available_captions(captions: CaptionQuery) -> None:
+    print(
+        f"Available caption codes are: {', '.join(c.code for c in captions)}"
+    )
+
+
+def download_caption(
+    youtube: YouTube, lang_code: Optional[str], target: Optional[str] = None
+) -> None:
+    """Download a caption for the YouTube video.
+
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param str lang_code:
+        Language code desired for caption file.
+        Prints available codes if the value is None
+        or the desired code is not available.
+    :param str target:
+        Target directory for download
+    """
+    try:
+        caption = youtube.captions[lang_code]
+        downloaded_path = caption.download(
+            title=youtube.title, output_path=target
+        )
+        print(f"Saved caption file to: {downloaded_path}")
+    except KeyError:
+        print(f"Unable to find caption with code: {lang_code}")
+        _print_available_captions(youtube.captions)
+
+
+def download_audio(
+    youtube: YouTube, filetype: str, target: Optional[str] = None
+) -> None:
+    """
+    Given a filetype, downloads the highest quality available audio stream for a
+    YouTube video.
+
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param str filetype:
+        Desired file format to download.
+    :param str target:
+        Target directory for download
+    """
+    audio = (
+        youtube.streams.filter(only_audio=True, subtype=filetype)
+        .order_by("abr")
+        .last()
+    )
+
+    if audio is None:
+        print("No audio only stream found. Try one of these:")
+        display_streams(youtube)
+        sys.exit()
+
+    youtube.register_on_progress_callback(on_progress)
+
+    try:
+        _download(audio, target=target)
+    except KeyboardInterrupt:
+        sys.exit()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pytube/contrib/__init__.py b/pytube/contrib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/pytube/contrib/__pycache__/__init__.cpython-310.pyc b/pytube/contrib/__pycache__/__init__.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..147e1b832be9f01eff46776e11b577d91599a3b3
Binary files /dev/null and b/pytube/contrib/__pycache__/__init__.cpython-310.pyc differ
diff --git a/pytube/contrib/__pycache__/__init__.cpython-39.pyc b/pytube/contrib/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..febbf2fddad0991e56249082e78517ca95e40b9e
Binary files /dev/null and b/pytube/contrib/__pycache__/__init__.cpython-39.pyc differ
diff --git a/pytube/contrib/__pycache__/channel.cpython-310.pyc b/pytube/contrib/__pycache__/channel.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f84ea4af1f983133d0d1871e13885205edae5faf
Binary files /dev/null and b/pytube/contrib/__pycache__/channel.cpython-310.pyc differ
diff --git a/pytube/contrib/__pycache__/channel.cpython-39.pyc b/pytube/contrib/__pycache__/channel.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8dc5ea77242bc1610966c04ccff54311c01b2852
Binary files /dev/null and b/pytube/contrib/__pycache__/channel.cpython-39.pyc differ
diff --git a/pytube/contrib/__pycache__/playlist.cpython-310.pyc b/pytube/contrib/__pycache__/playlist.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..234b436b8b52ee2e67b5fbd099889dfbbe0681d9
Binary files /dev/null and b/pytube/contrib/__pycache__/playlist.cpython-310.pyc differ
diff --git a/pytube/contrib/__pycache__/playlist.cpython-39.pyc b/pytube/contrib/__pycache__/playlist.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5d907c7b146a13659690c487bccee9bca1fd1861
Binary files /dev/null and b/pytube/contrib/__pycache__/playlist.cpython-39.pyc differ
diff --git a/pytube/contrib/__pycache__/search.cpython-310.pyc b/pytube/contrib/__pycache__/search.cpython-310.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..36021f1e2502eebdb4370d7c9cd6213ee04558d8
Binary files /dev/null and b/pytube/contrib/__pycache__/search.cpython-310.pyc differ
diff --git a/pytube/contrib/__pycache__/search.cpython-39.pyc b/pytube/contrib/__pycache__/search.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bc70f8efd2c342ca8587bc4ec55eda85fc32cadd
Binary files /dev/null and b/pytube/contrib/__pycache__/search.cpython-39.pyc differ
diff --git a/pytube/contrib/channel.py b/pytube/contrib/channel.py
new file mode 100644
index 0000000000000000000000000000000000000000..147ff7eaa3c8d013a61ba02817c8400feb311c49
--- /dev/null
+++ b/pytube/contrib/channel.py
@@ -0,0 +1,201 @@
+# -*- coding: utf-8 -*-
+"""Module for interacting with a user's youtube channel."""
+import json
+import logging
+from typing import Dict, List, Optional, Tuple
+
+from pytube import extract, Playlist, request
+from pytube.helpers import uniqueify
+
+logger = logging.getLogger(__name__)
+
+
+class Channel(Playlist):
+    def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
+        """Construct a :class:`Channel <Channel>`.
+
+        :param str url:
+            A valid YouTube channel URL.
+        :param proxies:
+            (Optional) A dictionary of proxies to use for web requests.
+        """
+        super().__init__(url, proxies)
+
+        self.channel_uri = extract.channel_name(url)
+
+        self.channel_url = (
+            f"https://www.youtube.com{self.channel_uri}"
+        )
+
+        self.videos_url = self.channel_url + '/videos'
+        self.playlists_url = self.channel_url + '/playlists'
+        self.community_url = self.channel_url + '/community'
+        self.featured_channels_url = self.channel_url + '/channels'
+        self.about_url = self.channel_url + '/about'
+
+        # Possible future additions
+        self._playlists_html = None
+        self._community_html = None
+        self._featured_channels_html = None
+        self._about_html = None
+
+    @property
+    def channel_name(self):
+        """Get the name of the YouTube channel.
+
+        :rtype: str
+        """
+        return self.initial_data['metadata']['channelMetadataRenderer']['title']
+
+    @property
+    def channel_id(self):
+        """Get the ID of the YouTube channel.
+
+        This will return the underlying ID, not the vanity URL.
+
+        :rtype: str
+        """
+        return self.initial_data['metadata']['channelMetadataRenderer']['externalId']
+
+    @property
+    def vanity_url(self):
+        """Get the vanity URL of the YouTube channel.
+
+        Returns None if it doesn't exist.
+
+        :rtype: str
+        """
+        return self.initial_data['metadata']['channelMetadataRenderer'].get('vanityChannelUrl', None)  # noqa:E501
+
+    @property
+    def html(self):
+        """Get the html for the /videos page.
+
+        :rtype: str
+        """
+        if self._html:
+            return self._html
+        self._html = request.get(self.videos_url)
+        return self._html
+
+    @property
+    def playlists_html(self):
+        """Get the html for the /playlists page.
+
+        Currently unused for any functionality.
+
+        :rtype: str
+        """
+        if self._playlists_html:
+            return self._playlists_html
+        else:
+            self._playlists_html = request.get(self.playlists_url)
+            return self._playlists_html
+
+    @property
+    def community_html(self):
+        """Get the html for the /community page.
+
+        Currently unused for any functionality.
+
+        :rtype: str
+        """
+        if self._community_html:
+            return self._community_html
+        else:
+            self._community_html = request.get(self.community_url)
+            return self._community_html
+
+    @property
+    def featured_channels_html(self):
+        """Get the html for the /channels page.
+
+        Currently unused for any functionality.
+
+        :rtype: str
+        """
+        if self._featured_channels_html:
+            return self._featured_channels_html
+        else:
+            self._featured_channels_html = request.get(self.featured_channels_url)
+            return self._featured_channels_html
+
+    @property
+    def about_html(self):
+        """Get the html for the /about page.
+
+        Currently unused for any functionality.
+
+        :rtype: str
+        """
+        if self._about_html:
+            return self._about_html
+        else:
+            self._about_html = request.get(self.about_url)
+            return self._about_html
+
+    @staticmethod
+    def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]:
+        """Extracts videos from a raw json page
+
+        :param str raw_json: Input json extracted from the page or the last
+            server response
+        :rtype: Tuple[List[str], Optional[str]]
+        :returns: Tuple containing a list of up to 100 video watch ids and
+            a continuation token, if more videos are available
+        """
+        initial_data = json.loads(raw_json)
+        # this is the json tree structure, if the json was extracted from
+        # html
+        try:
+            videos = initial_data["contents"][
+                "twoColumnBrowseResultsRenderer"][
+                "tabs"][1]["tabRenderer"]["content"][
+                "sectionListRenderer"]["contents"][0][
+                "itemSectionRenderer"]["contents"][0][
+                "gridRenderer"]["items"]
+        except (KeyError, IndexError, TypeError):
+            try:
+                # this is the json tree structure, if the json was directly sent
+                # by the server in a continuation response
+                important_content = initial_data[1]['response']['onResponseReceivedActions'][
+                    0
+                ]['appendContinuationItemsAction']['continuationItems']
+                videos = important_content
+            except (KeyError, IndexError, TypeError):
+                try:
+                    # this is the json tree structure, if the json was directly sent
+                    # by the server in a continuation response
+                    # no longer a list and no longer has the "response" key
+                    important_content = initial_data['onResponseReceivedActions'][0][
+                        'appendContinuationItemsAction']['continuationItems']
+                    videos = important_content
+                except (KeyError, IndexError, TypeError) as p:
+                    logger.info(p)
+                    return [], None
+
+        try:
+            continuation = videos[-1]['continuationItemRenderer'][
+                'continuationEndpoint'
+            ]['continuationCommand']['token']
+            videos = videos[:-1]
+        except (KeyError, IndexError):
+            # if there is an error, no continuation is available
+            continuation = None
+
+        # remove duplicates
+        return (
+            uniqueify(
+                list(
+                    # only extract the video ids from the video data
+                    map(
+                        lambda x: (
+                            f"/watch?v="
+                            f"{x['gridVideoRenderer']['videoId']}"
+                        ),
+                        videos
+                    )
+                ),
+            ),
+            continuation,
+        )
diff --git a/pytube/contrib/playlist.py b/pytube/contrib/playlist.py
new file mode 100644
index 0000000000000000000000000000000000000000..c55f5e9dc5ec2f7023be75638c951780c46d8d19
--- /dev/null
+++ b/pytube/contrib/playlist.py
@@ -0,0 +1,419 @@
+"""Module to download a complete playlist from a youtube channel."""
+import json
+import logging
+from collections.abc import Sequence
+from datetime import date, datetime
+from typing import Dict, Iterable, List, Optional, Tuple, Union
+
+from pytube import extract, request, YouTube
+from pytube.helpers import cache, DeferredGeneratorList, install_proxy, uniqueify
+
+logger = logging.getLogger(__name__)
+
+
+class Playlist(Sequence):
+    """Load a YouTube playlist with URL"""
+
+    def __init__(self, url: str, proxies: Optional[Dict[str, str]] = None):
+        if proxies:
+            install_proxy(proxies)
+
+        self._input_url = url
+
+        # These need to be initialized as None for the properties.
+        self._html = None
+        self._ytcfg = None
+        self._initial_data = None
+        self._sidebar_info = None
+
+        self._playlist_id = None
+
+    @property
+    def playlist_id(self):
+        """Get the playlist id.
+
+        :rtype: str
+        """
+        if self._playlist_id:
+            return self._playlist_id
+        self._playlist_id = extract.playlist_id(self._input_url)
+        return self._playlist_id
+
+    @property
+    def playlist_url(self):
+        """Get the base playlist url.
+
+        :rtype: str
+        """
+        return f"https://www.youtube.com/playlist?list={self.playlist_id}"
+
+    @property
+    def html(self):
+        """Get the playlist page html.
+
+        :rtype: str
+        """
+        if self._html:
+            return self._html
+        self._html = request.get(self.playlist_url)
+        return self._html
+
+    @property
+    def ytcfg(self):
+        """Extract the ytcfg from the playlist page html.
+
+        :rtype: dict
+        """
+        if self._ytcfg:
+            return self._ytcfg
+        self._ytcfg = extract.get_ytcfg(self.html)
+        return self._ytcfg
+
+    @property
+    def initial_data(self):
+        """Extract the initial data from the playlist page html.
+
+        :rtype: dict
+        """
+        if self._initial_data:
+            return self._initial_data
+        else:
+            self._initial_data = extract.initial_data(self.html)
+            return self._initial_data
+
+    @property
+    def sidebar_info(self):
+        """Extract the sidebar info from the playlist page html.
+
+        :rtype: dict
+        """
+        if self._sidebar_info:
+            return self._sidebar_info
+        else:
+            self._sidebar_info = self.initial_data['sidebar'][
+                'playlistSidebarRenderer']['items']
+            return self._sidebar_info
+
+    @property
+    def yt_api_key(self):
+        """Extract the INNERTUBE_API_KEY from the playlist ytcfg.
+
+        :rtype: str
+        """
+        return self.ytcfg['INNERTUBE_API_KEY']
+
+    def _paginate(
+        self, until_watch_id: Optional[str] = None
+    ) -> Iterable[List[str]]:
+        """Parse the video links from the page source, yields the /watch?v=
+        part from video link
+
+        :param until_watch_id Optional[str]: YouTube Video watch id until
+            which the playlist should be read.
+
+        :rtype: Iterable[List[str]]
+        :returns: Iterable of lists of YouTube watch ids
+        """
+        videos_urls, continuation = self._extract_videos(
+            json.dumps(extract.initial_data(self.html))
+        )
+        if until_watch_id:
+            try:
+                trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
+                yield videos_urls[:trim_index]
+                return
+            except ValueError:
+                pass
+        yield videos_urls
+
+        # Extraction from a playlist only returns 100 videos at a time
+        # if self._extract_videos returns a continuation there are more
+        # than 100 songs inside a playlist, so we need to add further requests
+        # to gather all of them
+        if continuation:
+            load_more_url, headers, data = self._build_continuation_url(continuation)
+        else:
+            load_more_url, headers, data = None, None, None
+
+        while load_more_url and headers and data:  # there is an url found
+            logger.debug("load more url: %s", load_more_url)
+            # requesting the next page of videos with the url generated from the
+            # previous page, needs to be a post
+            req = request.post(load_more_url, extra_headers=headers, data=data)
+            # extract up to 100 songs from the page loaded
+            # returns another continuation if more videos are available
+            videos_urls, continuation = self._extract_videos(req)
+            if until_watch_id:
+                try:
+                    trim_index = videos_urls.index(f"/watch?v={until_watch_id}")
+                    yield videos_urls[:trim_index]
+                    return
+                except ValueError:
+                    pass
+            yield videos_urls
+
+            if continuation:
+                load_more_url, headers, data = self._build_continuation_url(
+                    continuation
+                )
+            else:
+                load_more_url, headers, data = None, None, None
+
+    def _build_continuation_url(self, continuation: str) -> Tuple[str, dict, dict]:
+        """Helper method to build the url and headers required to request
+        the next page of videos
+
+        :param str continuation: Continuation extracted from the json response
+            of the last page
+        :rtype: Tuple[str, dict, dict]
+        :returns: Tuple of an url and required headers for the next http
+            request
+        """
+        return (
+            (
+                # was changed to this format (and post requests)
+                # between 2021.03.02 and 2021.03.03
+                "https://www.youtube.com/youtubei/v1/browse?key="
+                f"{self.yt_api_key}"
+            ),
+            {
+                "X-YouTube-Client-Name": "1",
+                "X-YouTube-Client-Version": "2.20200720.00.02",
+            },
+            # extra data required for post request
+            {
+                "continuation": continuation,
+                "context": {
+                    "client": {
+                        "clientName": "WEB",
+                        "clientVersion": "2.20200720.00.02"
+                    }
+                }
+            }
+        )
+
+    @staticmethod
+    def _extract_videos(raw_json: str) -> Tuple[List[str], Optional[str]]:
+        """Extracts videos from a raw json page
+
+        :param str raw_json: Input json extracted from the page or the last
+            server response
+        :rtype: Tuple[List[str], Optional[str]]
+        :returns: Tuple containing a list of up to 100 video watch ids and
+            a continuation token, if more videos are available
+        """
+        initial_data = json.loads(raw_json)
+        try:
+            # this is the json tree structure, if the json was extracted from
+            # html
+            section_contents = initial_data["contents"][
+                "twoColumnBrowseResultsRenderer"][
+                "tabs"][0]["tabRenderer"]["content"][
+                "sectionListRenderer"]["contents"]
+            try:
+                # Playlist without submenus
+                important_content = section_contents[
+                    0]["itemSectionRenderer"][
+                    "contents"][0]["playlistVideoListRenderer"]
+            except (KeyError, IndexError, TypeError):
+                # Playlist with submenus
+                important_content = section_contents[
+                    1]["itemSectionRenderer"][
+                    "contents"][0]["playlistVideoListRenderer"]
+            videos = important_content["contents"]
+        except (KeyError, IndexError, TypeError):
+            try:
+                # this is the json tree structure, if the json was directly sent
+                # by the server in a continuation response
+                # no longer a list and no longer has the "response" key
+                important_content = initial_data['onResponseReceivedActions'][0][
+                    'appendContinuationItemsAction']['continuationItems']
+                videos = important_content
+            except (KeyError, IndexError, TypeError) as p:
+                logger.info(p)
+                return [], None
+
+        try:
+            continuation = videos[-1]['continuationItemRenderer'][
+                'continuationEndpoint'
+            ]['continuationCommand']['token']
+            videos = videos[:-1]
+        except (KeyError, IndexError):
+            # if there is an error, no continuation is available
+            continuation = None
+
+        # remove duplicates
+        return (
+            uniqueify(
+                list(
+                    # only extract the video ids from the video data
+                    map(
+                        lambda x: (
+                            f"/watch?v="
+                            f"{x['playlistVideoRenderer']['videoId']}"
+                        ),
+                        videos
+                    )
+                ),
+            ),
+            continuation,
+        )
+
+    def trimmed(self, video_id: str) -> Iterable[str]:
+        """Retrieve a list of YouTube video URLs trimmed at the given video ID
+
+        i.e. if the playlist has video IDs 1,2,3,4 calling trimmed(3) returns
+        [1,2]
+        :type video_id: str
+            video ID to trim the returned list of playlist URLs at
+        :rtype: List[str]
+        :returns:
+            List of video URLs from the playlist trimmed at the given ID
+        """
+        for page in self._paginate(until_watch_id=video_id):
+            yield from (self._video_url(watch_path) for watch_path in page)
+
+    def url_generator(self):
+        """Generator that yields video URLs.
+
+        :Yields: Video URLs
+        """
+        for page in self._paginate():
+            for video in page:
+                yield self._video_url(video)
+
+    @property  # type: ignore
+    @cache
+    def video_urls(self) -> DeferredGeneratorList:
+        """Complete links of all the videos in playlist
+
+        :rtype: List[str]
+        :returns: List of video URLs
+        """
+        return DeferredGeneratorList(self.url_generator())
+
+    def videos_generator(self):
+        for url in self.video_urls:
+            yield YouTube(url)
+
+    @property
+    def videos(self) -> Iterable[YouTube]:
+        """Yields YouTube objects of videos in this playlist
+
+        :rtype: List[YouTube]
+        :returns: List of YouTube
+        """
+        return DeferredGeneratorList(self.videos_generator())
+
+    def __getitem__(self, i: Union[slice, int]) -> Union[str, List[str]]:
+        return self.video_urls[i]
+
+    def __len__(self) -> int:
+        return len(self.video_urls)
+
+    def __repr__(self) -> str:
+        return f"{repr(self.video_urls)}"
+
+    @property
+    @cache
+    def last_updated(self) -> Optional[date]:
+        """Extract the date that the playlist was last updated.
+
+        For some playlists, this will be a specific date, which is returned as a datetime
+        object. For other playlists, this is an estimate such as "1 week ago". Due to the
+        fact that this value is returned as a string, pytube does a best-effort parsing
+        where possible, and returns the raw string where it is not possible.
+
+        :return: Date of last playlist update where possible, else the string provided
+        :rtype: datetime.date
+        """
+        last_updated_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
+            'stats'][2]['runs'][1]['text']
+        try:
+            date_components = last_updated_text.split()
+            month = date_components[0]
+            day = date_components[1].strip(',')
+            year = date_components[2]
+            return datetime.strptime(
+                f"{month} {day:0>2} {year}", "%b %d %Y"
+            ).date()
+        except (IndexError, KeyError):
+            return last_updated_text
+
+    @property
+    @cache
+    def title(self) -> Optional[str]:
+        """Extract playlist title
+
+        :return: playlist title (name)
+        :rtype: Optional[str]
+        """
+        return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
+            'title']['runs'][0]['text']
+
+    @property
+    def description(self) -> str:
+        return self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
+            'description']['simpleText']
+
+    @property
+    def length(self):
+        """Extract the number of videos in the playlist.
+
+        :return: Playlist video count
+        :rtype: int
+        """
+        count_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
+            'stats'][0]['runs'][0]['text']
+        count_text = count_text.replace(',','')
+        return int(count_text)
+
+    @property
+    def views(self):
+        """Extract view count for playlist.
+
+        :return: Playlist view count
+        :rtype: int
+        """
+        # "1,234,567 views"
+        views_text = self.sidebar_info[0]['playlistSidebarPrimaryInfoRenderer'][
+            'stats'][1]['simpleText']
+        # "1,234,567"
+        count_text = views_text.split()[0]
+        # "1234567"
+        count_text = count_text.replace(',', '')
+        return int(count_text)
+
+    @property
+    def owner(self):
+        """Extract the owner of the playlist.
+
+        :return: Playlist owner name.
+        :rtype: str
+        """
+        return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][
+            'videoOwner']['videoOwnerRenderer']['title']['runs'][0]['text']
+
+    @property
+    def owner_id(self):
+        """Extract the channel_id of the owner of the playlist.
+
+        :return: Playlist owner's channel ID.
+        :rtype: str
+        """
+        return self.sidebar_info[1]['playlistSidebarSecondaryInfoRenderer'][
+            'videoOwner']['videoOwnerRenderer']['title']['runs'][0][
+            'navigationEndpoint']['browseEndpoint']['browseId']
+
+    @property
+    def owner_url(self):
+        """Create the channel url of the owner of the playlist.
+
+        :return: Playlist owner's channel url.
+        :rtype: str
+        """
+        return f'https://www.youtube.com/channel/{self.owner_id}'
+
+    @staticmethod
+    def _video_url(watch_path: str):
+        return f"https://www.youtube.com{watch_path}"
diff --git a/pytube/contrib/search.py b/pytube/contrib/search.py
new file mode 100644
index 0000000000000000000000000000000000000000..96982d80ed6291b9a526fd7329b802e8360ef708
--- /dev/null
+++ b/pytube/contrib/search.py
@@ -0,0 +1,225 @@
+"""Module for interacting with YouTube search."""
+# Native python imports
+import logging
+
+# Local imports
+from pytube import YouTube
+from pytube.innertube import InnerTube
+
+
+logger = logging.getLogger(__name__)
+
+
+class Search:
+    def __init__(self, query):
+        """Initialize Search object.
+
+        :param str query:
+            Search query provided by the user.
+        """
+        self.query = query
+        self._innertube_client = InnerTube(client='WEB')
+
+        # The first search, without a continuation, is structured differently
+        #  and contains completion suggestions, so we must store this separately
+        self._initial_results = None
+
+        self._results = None
+        self._completion_suggestions = None
+
+        # Used for keeping track of query continuations so that new results
+        #  are always returned when get_next_results() is called
+        self._current_continuation = None
+
+    @property
+    def completion_suggestions(self):
+        """Return query autocompletion suggestions for the query.
+
+        :rtype: list
+        :returns:
+            A list of autocomplete suggestions provided by YouTube for the query.
+        """
+        if self._completion_suggestions:
+            return self._completion_suggestions
+        if self.results:
+            self._completion_suggestions = self._initial_results['refinements']
+        return self._completion_suggestions
+
+    @property
+    def results(self):
+        """Return search results.
+
+        On first call, will generate and return the first set of results.
+        Additional results can be generated using ``.get_next_results()``.
+
+        :rtype: list
+        :returns:
+            A list of YouTube objects.
+        """
+        if self._results:
+            return self._results
+
+        videos, continuation = self.fetch_and_parse()
+        self._results = videos
+        self._current_continuation = continuation
+        return self._results
+
+    def get_next_results(self):
+        """Use the stored continuation string to fetch the next set of results.
+
+        This method does not return the results, but instead updates the results property.
+        """
+        if self._current_continuation:
+            videos, continuation = self.fetch_and_parse(self._current_continuation)
+            self._results.extend(videos)
+            self._current_continuation = continuation
+        else:
+            raise IndexError
+
+    def fetch_and_parse(self, continuation=None):
+        """Fetch from the innertube API and parse the results.
+
+        :param str continuation:
+            Continuation string for fetching results.
+        :rtype: tuple
+        :returns:
+            A tuple of a list of YouTube objects and a continuation string.
+        """
+        # Begin by executing the query and identifying the relevant sections
+        #  of the results
+        raw_results = self.fetch_query(continuation)
+
+        # Initial result is handled by try block, continuations by except block
+        try:
+            sections = raw_results['contents']['twoColumnSearchResultsRenderer'][
+                'primaryContents']['sectionListRenderer']['contents']
+        except KeyError:
+            sections = raw_results['onResponseReceivedCommands'][0][
+                'appendContinuationItemsAction']['continuationItems']
+        item_renderer = None
+        continuation_renderer = None
+        for s in sections:
+            if 'itemSectionRenderer' in s:
+                item_renderer = s['itemSectionRenderer']
+            if 'continuationItemRenderer' in s:
+                continuation_renderer = s['continuationItemRenderer']
+
+        # If the continuationItemRenderer doesn't exist, assume no further results
+        if continuation_renderer:
+            next_continuation = continuation_renderer['continuationEndpoint'][
+                'continuationCommand']['token']
+        else:
+            next_continuation = None
+
+        # If the itemSectionRenderer doesn't exist, assume no results.
+        if item_renderer:
+            videos = []
+            raw_video_list = item_renderer['contents']
+            for video_details in raw_video_list:
+                # Skip over ads
+                if video_details.get('searchPyvRenderer', {}).get('ads', None):
+                    continue
+
+                # Skip "recommended" type videos e.g. "people also watched" and "popular X"
+                #  that break up the search results
+                if 'shelfRenderer' in video_details:
+                    continue
+
+                # Skip auto-generated "mix" playlist results
+                if 'radioRenderer' in video_details:
+                    continue
+
+                # Skip playlist results
+                if 'playlistRenderer' in video_details:
+                    continue
+
+                # Skip channel results
+                if 'channelRenderer' in video_details:
+                    continue
+
+                # Skip 'people also searched for' results
+                if 'horizontalCardListRenderer' in video_details:
+                    continue
+
+                # Can't seem to reproduce, probably related to typo fix suggestions
+                if 'didYouMeanRenderer' in video_details:
+                    continue
+
+                # Seems to be the renderer used for the image shown on a no results page
+                if 'backgroundPromoRenderer' in video_details:
+                    continue
+
+                if 'videoRenderer' not in video_details:
+                    logger.warn('Unexpected renderer encountered.')
+                    logger.warn(f'Renderer name: {video_details.keys()}')
+                    logger.warn(f'Search term: {self.query}')
+                    logger.warn(
+                        'Please open an issue at '
+                        'https://github.com/pytube/pytube/issues '
+                        'and provide this log output.'
+                    )
+                    continue
+
+                # Extract relevant video information from the details.
+                # Some of this can be used to pre-populate attributes of the
+                #  YouTube object.
+                vid_renderer = video_details['videoRenderer']
+                vid_id = vid_renderer['videoId']
+                vid_url = f'https://www.youtube.com/watch?v={vid_id}'
+                vid_title = vid_renderer['title']['runs'][0]['text']
+                vid_channel_name = vid_renderer['ownerText']['runs'][0]['text']
+                vid_channel_uri = vid_renderer['ownerText']['runs'][0][
+                    'navigationEndpoint']['commandMetadata']['webCommandMetadata']['url']
+                # Livestreams have "runs", non-livestreams have "simpleText",
+                #  and scheduled releases do not have 'viewCountText'
+                if 'viewCountText' in vid_renderer:
+                    if 'runs' in vid_renderer['viewCountText']:
+                        vid_view_count_text = vid_renderer['viewCountText']['runs'][0]['text']
+                    else:
+                        vid_view_count_text = vid_renderer['viewCountText']['simpleText']
+                    # Strip ' views' text, then remove commas
+                    stripped_text = vid_view_count_text.split()[0].replace(',','')
+                    if stripped_text == 'No':
+                        vid_view_count = 0
+                    else:
+                        vid_view_count = int(stripped_text)
+                else:
+                    vid_view_count = 0
+                if 'lengthText' in vid_renderer:
+                    vid_length = vid_renderer['lengthText']['simpleText']
+                else:
+                    vid_length = None
+
+                vid_metadata = {
+                    'id': vid_id,
+                    'url': vid_url,
+                    'title': vid_title,
+                    'channel_name': vid_channel_name,
+                    'channel_url': vid_channel_uri,
+                    'view_count': vid_view_count,
+                    'length': vid_length
+                }
+
+                # Construct YouTube object from metadata and append to results
+                vid = YouTube(vid_metadata['url'])
+                vid.author = vid_metadata['channel_name']
+                vid.title = vid_metadata['title']
+                videos.append(vid)
+        else:
+            videos = None
+
+        return videos, next_continuation
+
+    def fetch_query(self, continuation=None):
+        """Fetch raw results from the innertube API.
+
+        :param str continuation:
+            Continuation string for fetching results.
+        :rtype: dict
+        :returns:
+            The raw json object returned by the innertube API.
+        """
+        query_results = self._innertube_client.search(self.query, continuation)
+        if not self._initial_results:
+            self._initial_results = query_results
+        return query_results  # noqa:R504
diff --git a/pytube/exceptions.py b/pytube/exceptions.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec44d2a12f7f82cbafbbfd717efae7f0644b7f33
--- /dev/null
+++ b/pytube/exceptions.py
@@ -0,0 +1,145 @@
+"""Library specific exception definitions."""
+from typing import Pattern, Union
+
+
+class PytubeError(Exception):
+    """Base pytube exception that all others inherit.
+
+    This is done to not pollute the built-in exceptions, which *could* result
+    in unintended errors being unexpectedly and incorrectly handled within
+    implementers code.
+    """
+
+
+class MaxRetriesExceeded(PytubeError):
+    """Maximum number of retries exceeded."""
+
+
+class HTMLParseError(PytubeError):
+    """HTML could not be parsed"""
+
+
+class ExtractError(PytubeError):
+    """Data extraction based exception."""
+
+
+class RegexMatchError(ExtractError):
+    """Regex pattern did not return any matches."""
+
+    def __init__(self, caller: str, pattern: Union[str, Pattern]):
+        """
+        :param str caller:
+            Calling function
+        :param str pattern:
+            Pattern that failed to match
+        """
+        super().__init__(f"{caller}: could not find match for {pattern}")
+        self.caller = caller
+        self.pattern = pattern
+
+
+class VideoUnavailable(PytubeError):
+    """Base video unavailable error."""
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.error_string)
+
+    @property
+    def error_string(self):
+        return f'{self.video_id} is unavailable'
+
+
+class AgeRestrictedError(VideoUnavailable):
+    """Video is age restricted, and cannot be accessed without OAuth."""
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.video_id)
+
+    @property
+    def error_string(self):
+        return f"{self.video_id} is age restricted, and can't be accessed without logging in."
+
+
+class LiveStreamError(VideoUnavailable):
+    """Video is a live stream."""
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.video_id)
+
+    @property
+    def error_string(self):
+        return f'{self.video_id} is streaming live and cannot be loaded'
+
+
+class VideoPrivate(VideoUnavailable):
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.video_id)
+
+    @property
+    def error_string(self):
+        return f'{self.video_id} is a private video'
+
+
+class RecordingUnavailable(VideoUnavailable):
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.video_id)
+
+    @property
+    def error_string(self):
+        return f'{self.video_id} does not have a live stream recording available'
+
+
+class MembersOnly(VideoUnavailable):
+    """Video is members-only.
+
+    YouTube has special videos that are only viewable to users who have
+    subscribed to a content creator.
+    ref: https://support.google.com/youtube/answer/7544492?hl=en
+    """
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.video_id)
+
+    @property
+    def error_string(self):
+        return f'{self.video_id} is a members-only video'
+
+
+class VideoRegionBlocked(VideoUnavailable):
+    def __init__(self, video_id: str):
+        """
+        :param str video_id:
+            A YouTube video identifier.
+        """
+        self.video_id = video_id
+        super().__init__(self.video_id)
+
+    @property
+    def error_string(self):
+        return f'{self.video_id} is not available in your region'
diff --git a/pytube/extract.py b/pytube/extract.py
new file mode 100644
index 0000000000000000000000000000000000000000..d08321408694869020527423cd4d2812f43be58b
--- /dev/null
+++ b/pytube/extract.py
@@ -0,0 +1,579 @@
+"""This module contains all non-cipher related data extraction logic."""
+import logging
+import urllib.parse
+import re
+from collections import OrderedDict
+from datetime import datetime
+from typing import Any, Dict, List, Optional, Tuple
+from urllib.parse import parse_qs, quote, urlencode, urlparse
+
+from pytube.cipher import Cipher
+from pytube.exceptions import HTMLParseError, LiveStreamError, RegexMatchError
+from pytube.helpers import regex_search
+from pytube.metadata import YouTubeMetadata
+from pytube.parser import parse_for_object, parse_for_all_objects
+
+
+logger = logging.getLogger(__name__)
+
+
+def publish_date(watch_html: str):
+    """Extract publish date
+    :param str watch_html:
+        The html contents of the watch page.
+    :rtype: str
+    :returns:
+        Publish date of the video.
+    """
+    try:
+        result = regex_search(
+            r"(?<=itemprop=\"datePublished\" content=\")\d{4}-\d{2}-\d{2}",
+            watch_html, group=0
+        )
+    except RegexMatchError:
+        return None
+    return datetime.strptime(result, '%Y-%m-%d')
+
+
+def recording_available(watch_html):
+    """Check if live stream recording is available.
+
+    :param str watch_html:
+        The html contents of the watch page.
+    :rtype: bool
+    :returns:
+        Whether or not the content is private.
+    """
+    unavailable_strings = [
+        'This live stream recording is not available.'
+    ]
+    for string in unavailable_strings:
+        if string in watch_html:
+            return False
+    return True
+
+
+def is_private(watch_html):
+    """Check if content is private.
+
+    :param str watch_html:
+        The html contents of the watch page.
+    :rtype: bool
+    :returns:
+        Whether or not the content is private.
+    """
+    private_strings = [
+        "This is a private video. Please sign in to verify that you may see it.",
+        "\"simpleText\":\"Private video\"",
+        "This video is private."
+    ]
+    for string in private_strings:
+        if string in watch_html:
+            return True
+    return False
+
+
+def is_age_restricted(watch_html: str) -> bool:
+    """Check if content is age restricted.
+
+    :param str watch_html:
+        The html contents of the watch page.
+    :rtype: bool
+    :returns:
+        Whether or not the content is age restricted.
+    """
+    try:
+        regex_search(r"og:restrictions:age", watch_html, group=0)
+    except RegexMatchError:
+        return False
+    return True
+
+
+def playability_status(watch_html: str) -> (str, str):
+    """Return the playability status and status explanation of a video.
+
+    For example, a video may have a status of LOGIN_REQUIRED, and an explanation
+    of "This is a private video. Please sign in to verify that you may see it."
+
+    This explanation is what gets incorporated into the media player overlay.
+
+    :param str watch_html:
+        The html contents of the watch page.
+    :rtype: bool
+    :returns:
+        Playability status and reason of the video.
+    """
+    player_response = initial_player_response(watch_html)
+    status_dict = player_response.get('playabilityStatus', {})
+    if 'liveStreamability' in status_dict:
+        return 'LIVE_STREAM', 'Video is a live stream.'
+    if 'status' in status_dict:
+        if 'reason' in status_dict:
+            return status_dict['status'], [status_dict['reason']]
+        if 'messages' in status_dict:
+            return status_dict['status'], status_dict['messages']
+    return None, [None]
+
+
+def video_id(url: str) -> str:
+    """Extract the ``video_id`` from a YouTube url.
+
+    This function supports the following patterns:
+
+    - :samp:`https://youtube.com/watch?v={video_id}`
+    - :samp:`https://youtube.com/embed/{video_id}`
+    - :samp:`https://youtu.be/{video_id}`
+
+    :param str url:
+        A YouTube url containing a video id.
+    :rtype: str
+    :returns:
+        YouTube video id.
+    """
+    return regex_search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url, group=1)
+
+
+def playlist_id(url: str) -> str:
+    """Extract the ``playlist_id`` from a YouTube url.
+
+    This function supports the following patterns:
+
+    - :samp:`https://youtube.com/playlist?list={playlist_id}`
+    - :samp:`https://youtube.com/watch?v={video_id}&list={playlist_id}`
+
+    :param str url:
+        A YouTube url containing a playlist id.
+    :rtype: str
+    :returns:
+        YouTube playlist id.
+    """
+    parsed = urllib.parse.urlparse(url)
+    return parse_qs(parsed.query)['list'][0]
+
+
+def channel_name(url: str) -> str:
+    """Extract the ``channel_name`` or ``channel_id`` from a YouTube url.
+
+    This function supports the following patterns:
+
+    - :samp:`https://youtube.com/c/{channel_name}/*`
+    - :samp:`https://youtube.com/channel/{channel_id}/*
+    - :samp:`https://youtube.com/u/{channel_name}/*`
+    - :samp:`https://youtube.com/user/{channel_id}/*
+
+    :param str url:
+        A YouTube url containing a channel name.
+    :rtype: str
+    :returns:
+        YouTube channel name.
+    """
+    patterns = [
+        r"(?:\/(c)\/([%\d\w_\-]+)(\/.*)?)",
+        r"(?:\/(channel)\/([%\w\d_\-]+)(\/.*)?)",
+        r"(?:\/(u)\/([%\d\w_\-]+)(\/.*)?)",
+        r"(?:\/(user)\/([%\w\d_\-]+)(\/.*)?)"
+    ]
+    for pattern in patterns:
+        regex = re.compile(pattern)
+        function_match = regex.search(url)
+        if function_match:
+            logger.debug("finished regex search, matched: %s", pattern)
+            uri_style = function_match.group(1)
+            uri_identifier = function_match.group(2)
+            return f'/{uri_style}/{uri_identifier}'
+
+    raise RegexMatchError(
+        caller="channel_name", pattern="patterns"
+    )
+
+
+def video_info_url(video_id: str, watch_url: str) -> str:
+    """Construct the video_info url.
+
+    :param str video_id:
+        A YouTube video identifier.
+    :param str watch_url:
+        A YouTube watch url.
+    :rtype: str
+    :returns:
+        :samp:`https://youtube.com/get_video_info` with necessary GET
+        parameters.
+    """
+    params = OrderedDict(
+        [
+            ("video_id", video_id),
+            ("ps", "default"),
+            ("eurl", quote(watch_url)),
+            ("hl", "en_US"),
+            ("html5", "1"),
+            ("c", "TVHTML5"),
+            ("cver", "7.20201028"),
+        ]
+    )
+    return _video_info_url(params)
+
+
+def video_info_url_age_restricted(video_id: str, embed_html: str) -> str:
+    """Construct the video_info url.
+
+    :param str video_id:
+        A YouTube video identifier.
+    :param str embed_html:
+        The html contents of the embed page (for age restricted videos).
+    :rtype: str
+    :returns:
+        :samp:`https://youtube.com/get_video_info` with necessary GET
+        parameters.
+    """
+    try:
+        sts = regex_search(r'"sts"\s*:\s*(\d+)', embed_html, group=1)
+    except RegexMatchError:
+        sts = ""
+    # Here we use ``OrderedDict`` so that the output is consistent between
+    # Python 2.7+.
+    eurl = f"https://youtube.googleapis.com/v/{video_id}"
+    params = OrderedDict(
+        [
+            ("video_id", video_id),
+            ("eurl", eurl),
+            ("sts", sts),
+            ("html5", "1"),
+            ("c", "TVHTML5"),
+            ("cver", "7.20201028"),
+        ]
+    )
+    return _video_info_url(params)
+
+
+def _video_info_url(params: OrderedDict) -> str:
+    return "https://www.youtube.com/get_video_info?" + urlencode(params)
+
+
+def js_url(html: str) -> str:
+    """Get the base JavaScript url.
+
+    Construct the base JavaScript url, which contains the decipher
+    "transforms".
+
+    :param str html:
+        The html contents of the watch page.
+    """
+    try:
+        base_js = get_ytplayer_config(html)['assets']['js']
+    except (KeyError, RegexMatchError):
+        base_js = get_ytplayer_js(html)
+    return "https://youtube.com" + base_js
+
+
+def mime_type_codec(mime_type_codec: str) -> Tuple[str, List[str]]:
+    """Parse the type data.
+
+    Breaks up the data in the ``type`` key of the manifest, which contains the
+    mime type and codecs serialized together, and splits them into separate
+    elements.
+
+    **Example**:
+
+    mime_type_codec('audio/webm; codecs="opus"') -> ('audio/webm', ['opus'])
+
+    :param str mime_type_codec:
+        String containing mime type and codecs.
+    :rtype: tuple
+    :returns:
+        The mime type and a list of codecs.
+
+    """
+    pattern = r"(\w+\/\w+)\;\scodecs=\"([a-zA-Z-0-9.,\s]*)\""
+    regex = re.compile(pattern)
+    results = regex.search(mime_type_codec)
+    if not results:
+        raise RegexMatchError(caller="mime_type_codec", pattern=pattern)
+    mime_type, codecs = results.groups()
+    return mime_type, [c.strip() for c in codecs.split(",")]
+
+
+def get_ytplayer_js(html: str) -> Any:
+    """Get the YouTube player base JavaScript path.
+
+    :param str html
+        The html contents of the watch page.
+    :rtype: str
+    :returns:
+        Path to YouTube's base.js file.
+    """
+    js_url_patterns = [
+        r"(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)"
+    ]
+    for pattern in js_url_patterns:
+        regex = re.compile(pattern)
+        function_match = regex.search(html)
+        if function_match:
+            logger.debug("finished regex search, matched: %s", pattern)
+            yt_player_js = function_match.group(1)
+            return yt_player_js
+
+    raise RegexMatchError(
+        caller="get_ytplayer_js", pattern="js_url_patterns"
+    )
+
+
+def get_ytplayer_config(html: str) -> Any:
+    """Get the YouTube player configuration data from the watch html.
+
+    Extract the ``ytplayer_config``, which is json data embedded within the
+    watch html and serves as the primary source of obtaining the stream
+    manifest data.
+
+    :param str html:
+        The html contents of the watch page.
+    :rtype: str
+    :returns:
+        Substring of the html containing the encoded manifest data.
+    """
+    logger.debug("finding initial function name")
+    config_patterns = [
+        r"ytplayer\.config\s*=\s*",
+        r"ytInitialPlayerResponse\s*=\s*"
+    ]
+    for pattern in config_patterns:
+        # Try each pattern consecutively if they don't find a match
+        try:
+            return parse_for_object(html, pattern)
+        except HTMLParseError as e:
+            logger.debug(f'Pattern failed: {pattern}')
+            logger.debug(e)
+            continue
+
+    # setConfig() needs to be handled a little differently.
+    # We want to parse the entire argument to setConfig()
+    #  and use then load that as json to find PLAYER_CONFIG
+    #  inside of it.
+    setconfig_patterns = [
+        r"yt\.setConfig\(.*['\"]PLAYER_CONFIG['\"]:\s*"
+    ]
+    for pattern in setconfig_patterns:
+        # Try each pattern consecutively if they don't find a match
+        try:
+            return parse_for_object(html, pattern)
+        except HTMLParseError:
+            continue
+
+    raise RegexMatchError(
+        caller="get_ytplayer_config", pattern="config_patterns, setconfig_patterns"
+    )
+
+
+def get_ytcfg(html: str) -> str:
+    """Get the entirety of the ytcfg object.
+
+    This is built over multiple pieces, so we have to find all matches and
+    combine the dicts together.
+
+    :param str html:
+        The html contents of the watch page.
+    :rtype: str
+    :returns:
+        Substring of the html containing the encoded manifest data.
+    """
+    ytcfg = {}
+    ytcfg_patterns = [
+        r"ytcfg\s=\s",
+        r"ytcfg\.set\("
+    ]
+    for pattern in ytcfg_patterns:
+        # Try each pattern consecutively and try to build a cohesive object
+        try:
+            found_objects = parse_for_all_objects(html, pattern)
+            for obj in found_objects:
+                ytcfg.update(obj)
+        except HTMLParseError:
+            continue
+
+    if len(ytcfg) > 0:
+        return ytcfg
+
+    raise RegexMatchError(
+        caller="get_ytcfg", pattern="ytcfg_pattenrs"
+    )
+
+
+def apply_signature(stream_manifest: Dict, vid_info: Dict, js: str) -> None:
+    """Apply the decrypted signature to the stream manifest.
+
+    :param dict stream_manifest:
+        Details of the media streams available.
+    :param str js:
+        The contents of the base.js asset file.
+
+    """
+    cipher = Cipher(js=js)
+
+    for i, stream in enumerate(stream_manifest):
+        try:
+            url: str = stream["url"]
+        except KeyError:
+            live_stream = (
+                vid_info.get("playabilityStatus", {},)
+                .get("liveStreamability")
+            )
+            if live_stream:
+                raise LiveStreamError("UNKNOWN")
+        # 403 Forbidden fix.
+        if "signature" in url or (
+            "s" not in stream and ("&sig=" in url or "&lsig=" in url)
+        ):
+            # For certain videos, YouTube will just provide them pre-signed, in
+            # which case there's no real magic to download them and we can skip
+            # the whole signature descrambling entirely.
+            logger.debug("signature found, skip decipher")
+            continue
+
+        signature = cipher.get_signature(ciphered_signature=stream["s"])
+
+        logger.debug(
+            "finished descrambling signature for itag=%s", stream["itag"]
+        )
+        parsed_url = urlparse(url)
+
+        # Convert query params off url to dict
+        query_params = parse_qs(urlparse(url).query)
+        query_params = {
+            k: v[0] for k,v in query_params.items()
+        }
+        query_params['sig'] = signature
+        if 'ratebypass' not in query_params.keys():
+            # Cipher n to get the updated value
+
+            initial_n = list(query_params['n'])
+            new_n = cipher.calculate_n(initial_n)
+            query_params['n'] = new_n
+
+        url = f'{parsed_url.scheme}://{parsed_url.netloc}{parsed_url.path}?{urlencode(query_params)}'  # noqa:E501
+
+        # 403 forbidden fix
+        stream_manifest[i]["url"] = url
+
+
+def apply_descrambler(stream_data: Dict) -> None:
+    """Apply various in-place transforms to YouTube's media stream data.
+
+    Creates a ``list`` of dictionaries by string splitting on commas, then
+    taking each list item, parsing it as a query string, converting it to a
+    ``dict`` and unquoting the value.
+
+    :param dict stream_data:
+        Dictionary containing query string encoded values.
+
+    **Example**:
+
+    >>> d = {'foo': 'bar=1&var=test,em=5&t=url%20encoded'}
+    >>> apply_descrambler(d, 'foo')
+    >>> print(d)
+    {'foo': [{'bar': '1', 'var': 'test'}, {'em': '5', 't': 'url encoded'}]}
+
+    """
+    if 'url' in stream_data:
+        return None
+
+    # Merge formats and adaptiveFormats into a single list
+    formats = []
+    if 'formats' in stream_data.keys():
+        formats.extend(stream_data['formats'])
+    if 'adaptiveFormats' in stream_data.keys():
+        formats.extend(stream_data['adaptiveFormats'])
+
+    # Extract url and s from signatureCiphers as necessary
+    for data in formats:
+        if 'url' not in data:
+            if 'signatureCipher' in data:
+                cipher_url = parse_qs(data['signatureCipher'])
+                data['url'] = cipher_url['url'][0]
+                data['s'] = cipher_url['s'][0]
+        data['is_otf'] = data.get('type') == 'FORMAT_STREAM_TYPE_OTF'
+
+    logger.debug("applying descrambler")
+    return formats
+
+
+def initial_data(watch_html: str) -> str:
+    """Extract the ytInitialData json from the watch_html page.
+
+    This mostly contains metadata necessary for rendering the page on-load,
+    such as video information, copyright notices, etc.
+
+    @param watch_html: Html of the watch page
+    @return:
+    """
+    patterns = [
+        r"window\[['\"]ytInitialData['\"]]\s*=\s*",
+        r"ytInitialData\s*=\s*"
+    ]
+    for pattern in patterns:
+        try:
+            return parse_for_object(watch_html, pattern)
+        except HTMLParseError:
+            pass
+
+    raise RegexMatchError(caller='initial_data', pattern='initial_data_pattern')
+
+
+def initial_player_response(watch_html: str) -> str:
+    """Extract the ytInitialPlayerResponse json from the watch_html page.
+
+    This mostly contains metadata necessary for rendering the page on-load,
+    such as video information, copyright notices, etc.
+
+    @param watch_html: Html of the watch page
+    @return:
+    """
+    patterns = [
+        r"window\[['\"]ytInitialPlayerResponse['\"]]\s*=\s*",
+        r"ytInitialPlayerResponse\s*=\s*"
+    ]
+    for pattern in patterns:
+        try:
+            return parse_for_object(watch_html, pattern)
+        except HTMLParseError:
+            pass
+
+    raise RegexMatchError(
+        caller='initial_player_response',
+        pattern='initial_player_response_pattern'
+    )
+
+
+def metadata(initial_data) -> Optional[YouTubeMetadata]:
+    """Get the informational metadata for the video.
+
+    e.g.:
+    [
+        {
+            'Song': '강남스타일(Gangnam Style)',
+            'Artist': 'PSY',
+            'Album': 'PSY SIX RULES Pt.1',
+            'Licensed to YouTube by': 'YG Entertainment Inc. [...]'
+        }
+    ]
+
+    :rtype: YouTubeMetadata
+    """
+    try:
+        metadata_rows: List = initial_data["contents"]["twoColumnWatchNextResults"][
+            "results"]["results"]["contents"][1]["videoSecondaryInfoRenderer"][
+            "metadataRowContainer"]["metadataRowContainerRenderer"]["rows"]
+    except (KeyError, IndexError):
+        # If there's an exception accessing this data, it probably doesn't exist.
+        return YouTubeMetadata([])
+
+    # Rows appear to only have "metadataRowRenderer" or "metadataRowHeaderRenderer"
+    #  and we only care about the former, so we filter the others
+    metadata_rows = filter(
+        lambda x: "metadataRowRenderer" in x.keys(),
+        metadata_rows
+    )
+
+    # We then access the metadataRowRenderer key in each element
+    #  and build a metadata object from this new list
+    metadata_rows = [x["metadataRowRenderer"] for x in metadata_rows]
+
+    return YouTubeMetadata(metadata_rows)
diff --git a/pytube/helpers.py b/pytube/helpers.py
new file mode 100644
index 0000000000000000000000000000000000000000..4cf02eb413b08aeca0ffe33112f30b51705119cb
--- /dev/null
+++ b/pytube/helpers.py
@@ -0,0 +1,335 @@
+"""Various helper functions implemented by pytube."""
+import functools
+import gzip
+import json
+import logging
+import os
+import re
+import warnings
+from typing import Any, Callable, Dict, List, Optional, TypeVar
+from urllib import request
+
+from pytube.exceptions import RegexMatchError
+
+logger = logging.getLogger(__name__)
+
+
+class DeferredGeneratorList:
+    """A wrapper class for deferring list generation.
+
+    Pytube has some continuation generators that create web calls, which means
+    that any time a full list is requested, all of those web calls must be
+    made at once, which could lead to slowdowns. This will allow individual
+    elements to be queried, so that slowdowns only happen as necessary. For
+    example, you can iterate over elements in the list without accessing them
+    all simultaneously. This should allow for speed improvements for playlist
+    and channel interactions.
+    """
+    def __init__(self, generator):
+        """Construct a :class:`DeferredGeneratorList <DeferredGeneratorList>`.
+
+        :param generator generator:
+            The deferrable generator to create a wrapper for.
+        :param func func:
+            (Optional) A function to call on the generator items to produce the list.
+        """
+        self.gen = generator
+        self._elements = []
+
+    def __eq__(self, other):
+        """We want to mimic list behavior for comparison."""
+        return list(self) == other
+
+    def __getitem__(self, key) -> Any:
+        """Only generate items as they're asked for."""
+        # We only allow querying with indexes.
+        if not isinstance(key, (int, slice)):
+            raise TypeError('Key must be either a slice or int.')
+
+        # Convert int keys to slice
+        key_slice = key
+        if isinstance(key, int):
+            key_slice = slice(key, key + 1, 1)
+
+        # Generate all elements up to the final item
+        while len(self._elements) < key_slice.stop:
+            try:
+                next_item = next(self.gen)
+            except StopIteration:
+                # If we can't find enough elements for the slice, raise an IndexError
+                raise IndexError
+            else:
+                self._elements.append(next_item)
+
+        return self._elements[key]
+
+    def __iter__(self):
+        """Custom iterator for dynamically generated list."""
+        iter_index = 0
+        while True:
+            try:
+                curr_item = self[iter_index]
+            except IndexError:
+                return
+            else:
+                yield curr_item
+                iter_index += 1
+
+    def __next__(self) -> Any:
+        """Fetch next element in iterator."""
+        try:
+            curr_element = self[self.iter_index]
+        except IndexError:
+            raise StopIteration
+        self.iter_index += 1
+        return curr_element  # noqa:R504
+
+    def __len__(self) -> int:
+        """Return length of list of all items."""
+        self.generate_all()
+        return len(self._elements)
+
+    def __repr__(self) -> str:
+        """String representation of all items."""
+        self.generate_all()
+        return str(self._elements)
+
+    def __reversed__(self):
+        self.generate_all()
+        return self._elements[::-1]
+
+    def generate_all(self):
+        """Generate all items."""
+        while True:
+            try:
+                next_item = next(self.gen)
+            except StopIteration:
+                break
+            else:
+                self._elements.append(next_item)
+
+
+def regex_search(pattern: str, string: str, group: int) -> str:
+    """Shortcut method to search a string for a given pattern.
+
+    :param str pattern:
+        A regular expression pattern.
+    :param str string:
+        A target string to search.
+    :param int group:
+        Index of group to return.
+    :rtype:
+        str or tuple
+    :returns:
+        Substring pattern matches.
+    """
+    regex = re.compile(pattern)
+    results = regex.search(string)
+    if not results:
+        raise RegexMatchError(caller="regex_search", pattern=pattern)
+
+    logger.debug("matched regex search: %s", pattern)
+
+    return results.group(group)
+
+
+def safe_filename(s: str, max_length: int = 255) -> str:
+    """Sanitize a string making it safe to use as a filename.
+
+    This function was based off the limitations outlined here:
+    https://en.wikipedia.org/wiki/Filename.
+
+    :param str s:
+        A string to make safe for use as a file name.
+    :param int max_length:
+        The maximum filename character length.
+    :rtype: str
+    :returns:
+        A sanitized string.
+    """
+    # Characters in range 0-31 (0x00-0x1F) are not allowed in ntfs filenames.
+    ntfs_characters = [chr(i) for i in range(0, 31)]
+    characters = [
+        r'"',
+        r"\#",
+        r"\$",
+        r"\%",
+        r"'",
+        r"\*",
+        r"\,",
+        r"\.",
+        r"\/",
+        r"\:",
+        r'"',
+        r"\;",
+        r"\<",
+        r"\>",
+        r"\?",
+        r"\\",
+        r"\^",
+        r"\|",
+        r"\~",
+        r"\\\\",
+    ]
+    pattern = "|".join(ntfs_characters + characters)
+    regex = re.compile(pattern, re.UNICODE)
+    filename = regex.sub("", s)
+    return filename[:max_length].rsplit(" ", 0)[0]
+
+
+def setup_logger(level: int = logging.ERROR, log_filename: Optional[str] = None) -> None:
+    """Create a configured instance of logger.
+
+    :param int level:
+        Describe the severity level of the logs to handle.
+    """
+    fmt = "[%(asctime)s] %(levelname)s in %(module)s: %(message)s"
+    date_fmt = "%H:%M:%S"
+    formatter = logging.Formatter(fmt, datefmt=date_fmt)
+
+    # https://github.com/pytube/pytube/issues/163
+    logger = logging.getLogger("pytube")
+    logger.setLevel(level)
+
+    stream_handler = logging.StreamHandler()
+    stream_handler.setFormatter(formatter)
+    logger.addHandler(stream_handler)
+
+    if log_filename is not None:
+        file_handler = logging.FileHandler(log_filename)
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+
+
+GenericType = TypeVar("GenericType")
+
+
+def cache(func: Callable[..., GenericType]) -> GenericType:
+    """ mypy compatible annotation wrapper for lru_cache"""
+    return functools.lru_cache()(func)  # type: ignore
+
+
+def deprecated(reason: str) -> Callable:
+    """
+    This is a decorator which can be used to mark functions
+    as deprecated. It will result in a warning being emitted
+    when the function is used.
+    """
+
+    def decorator(func1):
+        message = "Call to deprecated function {name} ({reason})."
+
+        @functools.wraps(func1)
+        def new_func1(*args, **kwargs):
+            warnings.simplefilter("always", DeprecationWarning)
+            warnings.warn(
+                message.format(name=func1.__name__, reason=reason),
+                category=DeprecationWarning,
+                stacklevel=2,
+            )
+            warnings.simplefilter("default", DeprecationWarning)
+            return func1(*args, **kwargs)
+
+        return new_func1
+
+    return decorator
+
+
+def target_directory(output_path: Optional[str] = None) -> str:
+    """
+    Function for determining target directory of a download.
+    Returns an absolute path (if relative one given) or the current
+    path (if none given). Makes directory if it does not exist.
+
+    :type output_path: str
+        :rtype: str
+    :returns:
+        An absolute directory path as a string.
+    """
+    if output_path:
+        if not os.path.isabs(output_path):
+            output_path = os.path.join(os.getcwd(), output_path)
+    else:
+        output_path = os.getcwd()
+    os.makedirs(output_path, exist_ok=True)
+    return output_path
+
+
+def install_proxy(proxy_handler: Dict[str, str]) -> None:
+    proxy_support = request.ProxyHandler(proxy_handler)
+    opener = request.build_opener(proxy_support)
+    request.install_opener(opener)
+
+
+def uniqueify(duped_list: List) -> List:
+    """Remove duplicate items from a list, while maintaining list order.
+
+    :param List duped_list
+        List to remove duplicates from
+
+    :return List result
+        De-duplicated list
+    """
+    seen: Dict[Any, bool] = {}
+    result = []
+    for item in duped_list:
+        if item in seen:
+            continue
+        seen[item] = True
+        result.append(item)
+    return result
+
+
+def generate_all_html_json_mocks():
+    """Regenerate the video mock json files for all current test videos.
+
+    This should automatically output to the test/mocks directory.
+    """
+    test_vid_ids = [
+        '2lAe1cqCOXo',
+        '5YceQ8YqYMc',
+        'irauhITDrsE',
+        'm8uHb5jIGN8',
+        'QRS8MkLhQmM',
+        'WXxV9g7lsFE'
+    ]
+    for vid_id in test_vid_ids:
+        create_mock_html_json(vid_id)
+
+
+def create_mock_html_json(vid_id) -> Dict[str, Any]:
+    """Generate a json.gz file with sample html responses.
+
+    :param str vid_id
+        YouTube video id
+
+    :return dict data
+        Dict used to generate the json.gz file
+    """
+    from pytube import YouTube
+    gzip_filename = 'yt-video-%s-html.json.gz' % vid_id
+
+    # Get the pytube directory in order to navigate to /tests/mocks
+    pytube_dir_path = os.path.abspath(
+        os.path.join(
+            os.path.dirname(__file__),
+            os.path.pardir
+        )
+    )
+    pytube_mocks_path = os.path.join(pytube_dir_path, 'tests', 'mocks')
+    gzip_filepath = os.path.join(pytube_mocks_path, gzip_filename)
+
+    yt = YouTube(f'https://www.youtube.com/watch?v={vid_id}')
+    html_data = {
+        'url': yt.watch_url,
+        'js': yt.js,
+        'embed_html': yt.embed_html,
+        'watch_html': yt.watch_html,
+        'vid_info': yt.vid_info
+    }
+
+    logger.info(f'Outputing json.gz file to {gzip_filepath}')
+    with gzip.open(gzip_filepath, 'wb') as f:
+        f.write(json.dumps(html_data).encode('utf-8'))
+
+    return html_data
diff --git a/pytube/innertube.py b/pytube/innertube.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5d940a05d2618a617b233f8f966da25c1b83699
--- /dev/null
+++ b/pytube/innertube.py
@@ -0,0 +1,359 @@
+"""This module is designed to interact with the innertube API.
+
+This module is NOT intended to be used directly by end users, as each of the
+interfaces returns raw results. These should instead be parsed to extract
+the useful information for the end user.
+"""
+# Native python imports
+import json
+import os
+import pathlib
+import time
+from urllib import parse
+
+# Local imports
+from pytube import request
+
+# YouTube on TV client secrets
+_client_id = '861556708454-d6dlm3lh05idd8npek18k6be8ba3oc68.apps.googleusercontent.com'
+_client_secret = 'SboVhoG9s0rNafixCSGGKXAT'
+
+# Extracted API keys -- unclear what these are linked to.
+_api_keys = [
+    'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+    'AIzaSyCtkvNIR1HCEwzsqK6JuE6KqpyjusIRI30',
+    'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
+    'AIzaSyC8UYZpvA2eknNex0Pjid0_eTLJoDu6los',
+    'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
+    'AIzaSyDHQ9ipnphqTzDqZsbtd8_Ru4_kiKVQe2k'
+]
+
+_default_clients = {
+    'WEB': {
+        'context': {
+            'client': {
+                'clientName': 'WEB',
+                'clientVersion': '2.20200720.00.02'
+            }
+        },
+        'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
+    },
+    'ANDROID': {
+        'context': {
+            'client': {
+                'clientName': 'ANDROID',
+                'clientVersion': '16.20'
+            }
+        },
+        'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
+    },
+    'WEB_EMBED': {
+        'context': {
+            'client': {
+                'clientName': 'WEB',
+                'clientVersion': '2.20210721.00.00',
+                'clientScreen': 'EMBED'
+            }
+        },
+        'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
+    },
+    'ANDROID_EMBED': {
+        'context': {
+            'client': {
+                'clientName': 'ANDROID',
+                'clientVersion': '16.20',
+                'clientScreen': 'EMBED'
+            }
+        },
+        'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
+    }
+}
+_token_timeout = 1800
+_cache_dir = pathlib.Path(__file__).parent.resolve() / '__cache__'
+_token_file = os.path.join(_cache_dir, 'tokens.json')
+
+
+class InnerTube:
+    """Object for interacting with the innertube API."""
+    def __init__(self, client='ANDROID', use_oauth=False, allow_cache=True):
+        """Initialize an InnerTube object.
+
+        :param str client:
+            Client to use for the object.
+            Default to web because it returns the most playback types.
+        :param bool use_oauth:
+            Whether or not to authenticate to YouTube.
+        :param bool allow_cache:
+            Allows caching of oauth tokens on the machine.
+        """
+        self.context = _default_clients[client]['context']
+        self.api_key = _default_clients[client]['api_key']
+        self.access_token = None
+        self.refresh_token = None
+        self.use_oauth = use_oauth
+        self.allow_cache = allow_cache
+
+        # Stored as epoch time
+        self.expires = None
+
+        # Try to load from file if specified
+        if self.use_oauth and self.allow_cache:
+            # Try to load from file if possible
+            if os.path.exists(_token_file):
+                with open(_token_file) as f:
+                    data = json.load(f)
+                    self.access_token = data['access_token']
+                    self.refresh_token = data['refresh_token']
+                    self.expires = data['expires']
+                    self.refresh_bearer_token()
+
+    def cache_tokens(self):
+        """Cache tokens to file if allowed."""
+        if not self.allow_cache:
+            return
+
+        data = {
+            'access_token': self.access_token,
+            'refresh_token': self.refresh_token,
+            'expires': self.expires
+        }
+        if not os.path.exists(_cache_dir):
+            os.mkdir(_cache_dir)
+        with open(_token_file, 'w') as f:
+            json.dump(data, f)
+
+    def refresh_bearer_token(self, force=False):
+        """Refreshes the OAuth token if necessary.
+
+        :param bool force:
+            Force-refresh the bearer token.
+        """
+        if not self.use_oauth:
+            return
+        # Skip refresh if it's not necessary and not forced
+        if self.expires > time.time() and not force:
+            return
+
+        # Subtracting 30 seconds is arbitrary to avoid potential time discrepencies
+        start_time = int(time.time() - 30)
+        data = {
+            'client_id': _client_id,
+            'client_secret': _client_secret,
+            'grant_type': 'refresh_token',
+            'refresh_token': self.refresh_token
+        }
+        response = request._execute_request(
+            'https://oauth2.googleapis.com/token',
+            'POST',
+            headers={
+                'Content-Type': 'application/json'
+            },
+            data=data
+        )
+        response_data = json.loads(response.read())
+
+        self.access_token = response_data['access_token']
+        self.expires = start_time + response_data['expires_in']
+        self.cache_tokens()
+
+    def fetch_bearer_token(self):
+        """Fetch an OAuth token."""
+        # Subtracting 30 seconds is arbitrary to avoid potential time discrepencies
+        start_time = int(time.time() - 30)
+        data = {
+            'client_id': _client_id,
+            'scope': 'https://www.googleapis.com/auth/youtube'
+        }
+        response = request._execute_request(
+            'https://oauth2.googleapis.com/device/code',
+            'POST',
+            headers={
+                'Content-Type': 'application/json'
+            },
+            data=data
+        )
+        response_data = json.loads(response.read())
+        verification_url = response_data['verification_url']
+        user_code = response_data['user_code']
+        print(f'Please open {verification_url} and input code {user_code}')
+        input('Press enter when you have completed this step.')
+
+        data = {
+            'client_id': _client_id,
+            'client_secret': _client_secret,
+            'device_code': response_data['device_code'],
+            'grant_type': 'urn:ietf:params:oauth:grant-type:device_code'
+        }
+        response = request._execute_request(
+            'https://oauth2.googleapis.com/token',
+            'POST',
+            headers={
+                'Content-Type': 'application/json'
+            },
+            data=data
+        )
+        response_data = json.loads(response.read())
+
+        self.access_token = response_data['access_token']
+        self.refresh_token = response_data['refresh_token']
+        self.expires = start_time + response_data['expires_in']
+        self.cache_tokens()
+
+    @property
+    def base_url(self):
+        """Return the base url endpoint for the innertube API."""
+        return 'https://www.youtube.com/youtubei/v1'
+
+    @property
+    def base_data(self):
+        """Return the base json data to transmit to the innertube API."""
+        return {
+            'context': self.context
+        }
+
+    @property
+    def base_params(self):
+        """Return the base query parameters to transmit to the innertube API."""
+        return {
+            'key': self.api_key,
+            'contentCheckOk': True,
+            'racyCheckOk': True
+        }
+
+    def _call_api(self, endpoint, query, data):
+        """Make a request to a given endpoint with the provided query parameters and data."""
+        # Remove the API key if oauth is being used.
+        if self.use_oauth:
+            del query['key']
+
+        endpoint_url = f'{endpoint}?{parse.urlencode(query)}'
+        headers = {
+            'Content-Type': 'application/json',
+        }
+        # Add the bearer token if applicable
+        if self.use_oauth:
+            if self.access_token:
+                self.refresh_bearer_token()
+                headers['Authorization'] = f'Bearer {self.access_token}'
+            else:
+                self.fetch_bearer_token()
+                headers['Authorization'] = f'Bearer {self.access_token}'
+
+        response = request._execute_request(
+            endpoint_url,
+            'POST',
+            headers=headers,
+            data=data
+        )
+        return json.loads(response.read())
+
+    def browse(self):
+        """Make a request to the browse endpoint.
+
+        TODO: Figure out how we can use this
+        """
+        # endpoint = f'{self.base_url}/browse'  # noqa:E800
+        ...
+        # return self._call_api(endpoint, query, self.base_data)  # noqa:E800
+
+    def config(self):
+        """Make a request to the config endpoint.
+
+        TODO: Figure out how we can use this
+        """
+        # endpoint = f'{self.base_url}/config'  # noqa:E800
+        ...
+        # return self._call_api(endpoint, query, self.base_data)  # noqa:E800
+
+    def guide(self):
+        """Make a request to the guide endpoint.
+
+        TODO: Figure out how we can use this
+        """
+        # endpoint = f'{self.base_url}/guide'  # noqa:E800
+        ...
+        # return self._call_api(endpoint, query, self.base_data)  # noqa:E800
+
+    def next(self):
+        """Make a request to the next endpoint.
+
+        TODO: Figure out how we can use this
+        """
+        # endpoint = f'{self.base_url}/next'  # noqa:E800
+        ...
+        # return self._call_api(endpoint, query, self.base_data)  # noqa:E800
+
+    def player(self, video_id):
+        """Make a request to the player endpoint.
+
+        :param str video_id:
+            The video id to get player info for.
+        :rtype: dict
+        :returns:
+            Raw player info results.
+        """
+        endpoint = f'{self.base_url}/player'
+        query = {
+            'videoId': video_id,
+        }
+        query.update(self.base_params)
+        return self._call_api(endpoint, query, self.base_data)
+
+    def search(self, search_query, continuation=None):
+        """Make a request to the search endpoint.
+
+        :param str search_query:
+            The query to search.
+        :rtype: dict
+        :returns:
+            Raw search query results.
+        """
+        endpoint = f'{self.base_url}/search'
+        query = {
+            'query': search_query
+        }
+        query.update(self.base_params)
+        data = {}
+        if continuation:
+            data['continuation'] = continuation
+        data.update(self.base_data)
+        return self._call_api(endpoint, query, data)
+
+    def verify_age(self, video_id):
+        """Make a request to the age_verify endpoint.
+
+        Notable examples of the types of video this verification step is for:
+        * https://www.youtube.com/watch?v=QLdAhwSBZ3w
+        * https://www.youtube.com/watch?v=hc0ZDaAZQT0
+
+        :param str video_id:
+            The video id to get player info for.
+        :rtype: dict
+        :returns:
+            Returns information that includes a URL for bypassing certain restrictions.
+        """
+        endpoint = f'{self.base_url}/verify_age'
+        data = {
+            'nextEndpoint': {
+                'urlEndpoint': {
+                    'url': f'/watch?v={video_id}'
+                }
+            },
+            'setControvercy': True
+        }
+        data.update(self.base_data)
+        result = self._call_api(endpoint, self.base_params, data)
+        return result
+
+    def get_transcript(self, video_id):
+        """Make a request to the get_transcript endpoint.
+
+        This is likely related to captioning for videos, but is currently untested.
+        """
+        endpoint = f'{self.base_url}/get_transcript'
+        query = {
+            'videoId': video_id,
+        }
+        query.update(self.base_params)
+        result = self._call_api(endpoint, query, self.base_data)
+        return result
diff --git a/pytube/itags.py b/pytube/itags.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f23cae8a7e5d9d8903671708174cc8428b517bb
--- /dev/null
+++ b/pytube/itags.py
@@ -0,0 +1,144 @@
+"""This module contains a lookup table of YouTube's itag values."""
+from typing import Dict
+
+PROGRESSIVE_VIDEO = {
+    5: ("240p", "64kbps"),
+    6: ("270p", "64kbps"),
+    13: ("144p", None),
+    17: ("144p", "24kbps"),
+    18: ("360p", "96kbps"),
+    22: ("720p", "192kbps"),
+    34: ("360p", "128kbps"),
+    35: ("480p", "128kbps"),
+    36: ("240p", None),
+    37: ("1080p", "192kbps"),
+    38: ("3072p", "192kbps"),
+    43: ("360p", "128kbps"),
+    44: ("480p", "128kbps"),
+    45: ("720p", "192kbps"),
+    46: ("1080p", "192kbps"),
+    59: ("480p", "128kbps"),
+    78: ("480p", "128kbps"),
+    82: ("360p", "128kbps"),
+    83: ("480p", "128kbps"),
+    84: ("720p", "192kbps"),
+    85: ("1080p", "192kbps"),
+    91: ("144p", "48kbps"),
+    92: ("240p", "48kbps"),
+    93: ("360p", "128kbps"),
+    94: ("480p", "128kbps"),
+    95: ("720p", "256kbps"),
+    96: ("1080p", "256kbps"),
+    100: ("360p", "128kbps"),
+    101: ("480p", "192kbps"),
+    102: ("720p", "192kbps"),
+    132: ("240p", "48kbps"),
+    151: ("720p", "24kbps"),
+    300: ("720p", "128kbps"),
+    301: ("1080p", "128kbps"),
+}
+
+DASH_VIDEO = {
+    # DASH Video
+    133: ("240p", None),  # MP4
+    134: ("360p", None),  # MP4
+    135: ("480p", None),  # MP4
+    136: ("720p", None),  # MP4
+    137: ("1080p", None),  # MP4
+    138: ("2160p", None),  # MP4
+    160: ("144p", None),  # MP4
+    167: ("360p", None),  # WEBM
+    168: ("480p", None),  # WEBM
+    169: ("720p", None),  # WEBM
+    170: ("1080p", None),  # WEBM
+    212: ("480p", None),  # MP4
+    218: ("480p", None),  # WEBM
+    219: ("480p", None),  # WEBM
+    242: ("240p", None),  # WEBM
+    243: ("360p", None),  # WEBM
+    244: ("480p", None),  # WEBM
+    245: ("480p", None),  # WEBM
+    246: ("480p", None),  # WEBM
+    247: ("720p", None),  # WEBM
+    248: ("1080p", None),  # WEBM
+    264: ("1440p", None),  # MP4
+    266: ("2160p", None),  # MP4
+    271: ("1440p", None),  # WEBM
+    272: ("4320p", None),  # WEBM
+    278: ("144p", None),  # WEBM
+    298: ("720p", None),  # MP4
+    299: ("1080p", None),  # MP4
+    302: ("720p", None),  # WEBM
+    303: ("1080p", None),  # WEBM
+    308: ("1440p", None),  # WEBM
+    313: ("2160p", None),  # WEBM
+    315: ("2160p", None),  # WEBM
+    330: ("144p", None),  # WEBM
+    331: ("240p", None),  # WEBM
+    332: ("360p", None),  # WEBM
+    333: ("480p", None),  # WEBM
+    334: ("720p", None),  # WEBM
+    335: ("1080p", None),  # WEBM
+    336: ("1440p", None),  # WEBM
+    337: ("2160p", None),  # WEBM
+    394: ("144p", None),  # MP4
+    395: ("240p", None),  # MP4
+    396: ("360p", None),  # MP4
+    397: ("480p", None),  # MP4
+    398: ("720p", None),  # MP4
+    399: ("1080p", None),  # MP4
+    400: ("1440p", None),  # MP4
+    401: ("2160p", None),  # MP4
+    402: ("4320p", None),  # MP4
+    571: ("4320p", None),  # MP4
+}
+
+DASH_AUDIO = {
+    # DASH Audio
+    139: (None, "48kbps"),  # MP4
+    140: (None, "128kbps"),  # MP4
+    141: (None, "256kbps"),  # MP4
+    171: (None, "128kbps"),  # WEBM
+    172: (None, "256kbps"),  # WEBM
+    249: (None, "50kbps"),  # WEBM
+    250: (None, "70kbps"),  # WEBM
+    251: (None, "160kbps"),  # WEBM
+    256: (None, "192kbps"),  # MP4
+    258: (None, "384kbps"),  # MP4
+    325: (None, None),  # MP4
+    328: (None, None),  # MP4
+}
+
+ITAGS = {
+    **PROGRESSIVE_VIDEO,
+    **DASH_VIDEO,
+    **DASH_AUDIO,
+}
+
+HDR = [330, 331, 332, 333, 334, 335, 336, 337]
+_3D = [82, 83, 84, 85, 100, 101, 102]
+LIVE = [91, 92, 93, 94, 95, 96, 132, 151]
+
+
+def get_format_profile(itag: int) -> Dict:
+    """Get additional format information for a given itag.
+
+    :param str itag:
+        YouTube format identifier code.
+    """
+    itag = int(itag)
+    if itag in ITAGS:
+        res, bitrate = ITAGS[itag]
+    else:
+        res, bitrate = None, None
+    return {
+        "resolution": res,
+        "abr": bitrate,
+        "is_live": itag in LIVE,
+        "is_3d": itag in _3D,
+        "is_hdr": itag in HDR,
+        "is_dash": (
+            itag in DASH_AUDIO
+            or itag in DASH_VIDEO
+        ),
+    }
diff --git a/pytube/metadata.py b/pytube/metadata.py
new file mode 100644
index 0000000000000000000000000000000000000000..be12c632312a8afb8fdbad91200b95238cb49371
--- /dev/null
+++ b/pytube/metadata.py
@@ -0,0 +1,48 @@
+"""This module contains the YouTubeMetadata class."""
+import json
+from typing import Dict, List, Optional
+
+
+class YouTubeMetadata:
+    def __init__(self, metadata: List):
+        self._raw_metadata: List = metadata
+        self._metadata = [{}]
+
+        for el in metadata:
+            # We only add metadata to the dict if it has a simpleText title.
+            if 'title' in el and 'simpleText' in el['title']:
+                metadata_title = el['title']['simpleText']
+            else:
+                continue
+
+            contents = el['contents'][0]
+            if 'simpleText' in contents:
+                self._metadata[-1][metadata_title] = contents['simpleText']
+            elif 'runs' in contents:
+                self._metadata[-1][metadata_title] = contents['runs'][0]['text']
+
+            # Upon reaching a dividing line, create a new grouping
+            if el.get('hasDividerLine', False):
+                self._metadata.append({})
+
+        # If we happen to create an empty dict at the end, drop it
+        if self._metadata[-1] == {}:
+            self._metadata = self._metadata[:-1]
+
+    def __getitem__(self, key):
+        return self._metadata[key]
+
+    def __iter__(self):
+        for el in self._metadata:
+            yield el
+
+    def __str__(self):
+        return json.dumps(self._metadata)
+
+    @property
+    def raw_metadata(self) -> Optional[Dict]:
+        return self._raw_metadata
+
+    @property
+    def metadata(self):
+        return self._metadata
diff --git a/pytube/monostate.py b/pytube/monostate.py
new file mode 100644
index 0000000000000000000000000000000000000000..7968af5fceeb1fcad31cba1fccbcb6b134ab2577
--- /dev/null
+++ b/pytube/monostate.py
@@ -0,0 +1,15 @@
+from typing import Any, Callable, Optional
+
+
+class Monostate:
+    def __init__(
+        self,
+        on_progress: Optional[Callable[[Any, bytes, int], None]],
+        on_complete: Optional[Callable[[Any, Optional[str]], None]],
+        title: Optional[str] = None,
+        duration: Optional[int] = None,
+    ):
+        self.on_progress = on_progress
+        self.on_complete = on_complete
+        self.title = title
+        self.duration = duration
diff --git a/pytube/parser.py b/pytube/parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..535a4b17789b34c02167043c3da1021335266950
--- /dev/null
+++ b/pytube/parser.py
@@ -0,0 +1,178 @@
+import ast
+import json
+import re
+from pytube.exceptions import HTMLParseError
+
+
+def parse_for_all_objects(html, preceding_regex):
+    """Parses input html to find all matches for the input starting point.
+
+    :param str html:
+        HTML to be parsed for an object.
+    :param str preceding_regex:
+        Regex to find the string preceding the object.
+    :rtype list:
+    :returns:
+        A list of dicts created from parsing the objects.
+    """
+    result = []
+    regex = re.compile(preceding_regex)
+    match_iter = regex.finditer(html)
+    for match in match_iter:
+        if match:
+            start_index = match.end()
+            try:
+                obj = parse_for_object_from_startpoint(html, start_index)
+            except HTMLParseError:
+                # Some of the instances might fail because set is technically
+                # a method of the ytcfg object. We'll skip these since they
+                # don't seem relevant at the moment.
+                continue
+            else:
+                result.append(obj)
+
+    if len(result) == 0:
+        raise HTMLParseError(f'No matches for regex {preceding_regex}')
+
+    return result
+
+
+def parse_for_object(html, preceding_regex):
+    """Parses input html to find the end of a JavaScript object.
+
+    :param str html:
+        HTML to be parsed for an object.
+    :param str preceding_regex:
+        Regex to find the string preceding the object.
+    :rtype dict:
+    :returns:
+        A dict created from parsing the object.
+    """
+    regex = re.compile(preceding_regex)
+    result = regex.search(html)
+    if not result:
+        raise HTMLParseError(f'No matches for regex {preceding_regex}')
+
+    start_index = result.end()
+    return parse_for_object_from_startpoint(html, start_index)
+
+
+def find_object_from_startpoint(html, start_point):
+    """Parses input html to find the end of a JavaScript object.
+
+    :param str html:
+        HTML to be parsed for an object.
+    :param int start_point:
+        Index of where the object starts.
+    :rtype dict:
+    :returns:
+        A dict created from parsing the object.
+    """
+    html = html[start_point:]
+    if html[0] not in ['{','[']:
+        raise HTMLParseError(f'Invalid start point. Start of HTML:\n{html[:20]}')
+
+    # First letter MUST be a open brace, so we put that in the stack,
+    # and skip the first character.
+    stack = [html[0]]
+    i = 1
+
+    context_closers = {
+        '{': '}',
+        '[': ']',
+        '"': '"'
+    }
+
+    while i < len(html):
+        if len(stack) == 0:
+            break
+        curr_char = html[i]
+        curr_context = stack[-1]
+
+        # If we've reached a context closer, we can remove an element off the stack
+        if curr_char == context_closers[curr_context]:
+            stack.pop()
+            i += 1
+            continue
+
+        # Strings require special context handling because they can contain
+        #  context openers *and* closers
+        if curr_context == '"':
+            # If there's a backslash in a string, we skip a character
+            if curr_char == '\\':
+                i += 2
+                continue
+        else:
+            # Non-string contexts are when we need to look for context openers.
+            if curr_char in context_closers.keys():
+                stack.append(curr_char)
+
+        i += 1
+
+    full_obj = html[:i]
+    return full_obj  # noqa: R504
+
+
+def parse_for_object_from_startpoint(html, start_point):
+    """JSONifies an object parsed from HTML.
+
+    :param str html:
+        HTML to be parsed for an object.
+    :param int start_point:
+        Index of where the object starts.
+    :rtype dict:
+    :returns:
+        A dict created from parsing the object.
+    """
+    full_obj = find_object_from_startpoint(html, start_point)
+    try:
+        return json.loads(full_obj)
+    except json.decoder.JSONDecodeError:
+        try:
+            return ast.literal_eval(full_obj)
+        except (ValueError, SyntaxError):
+            raise HTMLParseError('Could not parse object.')
+
+
+def throttling_array_split(js_array):
+    """Parses the throttling array into a python list of strings.
+
+    Expects input to begin with `[` and close with `]`.
+
+    :param str js_array:
+        The javascript array, as a string.
+    :rtype: list:
+    :returns:
+        A list of strings representing splits on `,` in the throttling array.
+    """
+    results = []
+    curr_substring = js_array[1:]
+
+    comma_regex = re.compile(r",")
+    func_regex = re.compile(r"function\([^)]*\)")
+
+    while len(curr_substring) > 0:
+        if curr_substring.startswith('function'):
+            # Handle functions separately. These can contain commas
+            match = func_regex.search(curr_substring)
+            match_start, match_end = match.span()
+
+            function_text = find_object_from_startpoint(curr_substring, match.span()[1])
+            full_function_def = curr_substring[:match_end + len(function_text)]
+            results.append(full_function_def)
+            curr_substring = curr_substring[len(full_function_def) + 1:]
+        else:
+            match = comma_regex.search(curr_substring)
+
+            # Try-catch to capture end of array
+            try:
+                match_start, match_end = match.span()
+            except AttributeError:
+                match_start = len(curr_substring) - 1
+                match_end = match_start + 1
+
+            curr_el = curr_substring[:match_start]
+            results.append(curr_el)
+            curr_substring = curr_substring[match_end:]
+
+    return results
diff --git a/pytube/query.py b/pytube/query.py
new file mode 100644
index 0000000000000000000000000000000000000000..d4878ba86c5a12c035b843b13a17aba19f511236
--- /dev/null
+++ b/pytube/query.py
@@ -0,0 +1,421 @@
+"""This module provides a query interface for media streams and captions."""
+from collections.abc import Mapping, Sequence
+from typing import Callable, List, Optional, Union
+
+from pytube import Caption, Stream
+from pytube.helpers import deprecated
+
+
+class StreamQuery(Sequence):
+    """Interface for querying the available media streams."""
+
+    def __init__(self, fmt_streams):
+        """Construct a :class:`StreamQuery <StreamQuery>`.
+
+        param list fmt_streams:
+            list of :class:`Stream <Stream>` instances.
+        """
+        self.fmt_streams = fmt_streams
+        self.itag_index = {int(s.itag): s for s in fmt_streams}
+
+    def filter(
+        self,
+        fps=None,
+        res=None,
+        resolution=None,
+        mime_type=None,
+        type=None,
+        subtype=None,
+        file_extension=None,
+        abr=None,
+        bitrate=None,
+        video_codec=None,
+        audio_codec=None,
+        only_audio=None,
+        only_video=None,
+        progressive=None,
+        adaptive=None,
+        is_dash=None,
+        custom_filter_functions=None,
+    ):
+        """Apply the given filtering criterion.
+
+        :param fps:
+            (optional) The frames per second.
+        :type fps:
+            int or None
+
+        :param resolution:
+            (optional) Alias to ``res``.
+        :type res:
+            str or None
+
+        :param res:
+            (optional) The video resolution.
+        :type resolution:
+            str or None
+
+        :param mime_type:
+            (optional) Two-part identifier for file formats and format contents
+            composed of a "type", a "subtype".
+        :type mime_type:
+            str or None
+
+        :param type:
+            (optional) Type part of the ``mime_type`` (e.g.: audio, video).
+        :type type:
+            str or None
+
+        :param subtype:
+            (optional) Sub-type part of the ``mime_type`` (e.g.: mp4, mov).
+        :type subtype:
+            str or None
+
+        :param file_extension:
+            (optional) Alias to ``sub_type``.
+        :type file_extension:
+            str or None
+
+        :param abr:
+            (optional) Average bitrate (ABR) refers to the average amount of
+            data transferred per unit of time (e.g.: 64kbps, 192kbps).
+        :type abr:
+            str or None
+
+        :param bitrate:
+            (optional) Alias to ``abr``.
+        :type bitrate:
+            str or None
+
+        :param video_codec:
+            (optional) Video compression format.
+        :type video_codec:
+            str or None
+
+        :param audio_codec:
+            (optional) Audio compression format.
+        :type audio_codec:
+            str or None
+
+        :param bool progressive:
+            Excludes adaptive streams (one file contains both audio and video
+            tracks).
+
+        :param bool adaptive:
+            Excludes progressive streams (audio and video are on separate
+            tracks).
+
+        :param bool is_dash:
+            Include/exclude dash streams.
+
+        :param bool only_audio:
+            Excludes streams with video tracks.
+
+        :param bool only_video:
+            Excludes streams with audio tracks.
+
+        :param custom_filter_functions:
+            (optional) Interface for defining complex filters without
+            subclassing.
+        :type custom_filter_functions:
+            list or None
+
+        """
+        filters = []
+        if res or resolution:
+            filters.append(lambda s: s.resolution == (res or resolution))
+
+        if fps:
+            filters.append(lambda s: s.fps == fps)
+
+        if mime_type:
+            filters.append(lambda s: s.mime_type == mime_type)
+
+        if type:
+            filters.append(lambda s: s.type == type)
+
+        if subtype or file_extension:
+            filters.append(lambda s: s.subtype == (subtype or file_extension))
+
+        if abr or bitrate:
+            filters.append(lambda s: s.abr == (abr or bitrate))
+
+        if video_codec:
+            filters.append(lambda s: s.video_codec == video_codec)
+
+        if audio_codec:
+            filters.append(lambda s: s.audio_codec == audio_codec)
+
+        if only_audio:
+            filters.append(
+                lambda s: (
+                    s.includes_audio_track and not s.includes_video_track
+                ),
+            )
+
+        if only_video:
+            filters.append(
+                lambda s: (
+                    s.includes_video_track and not s.includes_audio_track
+                ),
+            )
+
+        if progressive:
+            filters.append(lambda s: s.is_progressive)
+
+        if adaptive:
+            filters.append(lambda s: s.is_adaptive)
+
+        if custom_filter_functions:
+            filters.extend(custom_filter_functions)
+
+        if is_dash is not None:
+            filters.append(lambda s: s.is_dash == is_dash)
+
+        return self._filter(filters)
+
+    def _filter(self, filters: List[Callable]) -> "StreamQuery":
+        fmt_streams = self.fmt_streams
+        for filter_lambda in filters:
+            fmt_streams = filter(filter_lambda, fmt_streams)
+        return StreamQuery(list(fmt_streams))
+
+    def order_by(self, attribute_name: str) -> "StreamQuery":
+        """Apply a sort order. Filters out stream the do not have the attribute.
+
+        :param str attribute_name:
+            The name of the attribute to sort by.
+        """
+        has_attribute = [
+            s
+            for s in self.fmt_streams
+            if getattr(s, attribute_name) is not None
+        ]
+        # Check that the attributes have string values.
+        if has_attribute and isinstance(
+            getattr(has_attribute[0], attribute_name), str
+        ):
+            # Try to return a StreamQuery sorted by the integer representations
+            # of the values.
+            try:
+                return StreamQuery(
+                    sorted(
+                        has_attribute,
+                        key=lambda s: int(
+                            "".join(
+                                filter(str.isdigit, getattr(s, attribute_name))
+                            )
+                        ),  # type: ignore  # noqa: E501
+                    )
+                )
+            except ValueError:
+                pass
+
+        return StreamQuery(
+            sorted(has_attribute, key=lambda s: getattr(s, attribute_name))
+        )
+
+    def desc(self) -> "StreamQuery":
+        """Sort streams in descending order.
+
+        :rtype: :class:`StreamQuery <StreamQuery>`
+
+        """
+        return StreamQuery(self.fmt_streams[::-1])
+
+    def asc(self) -> "StreamQuery":
+        """Sort streams in ascending order.
+
+        :rtype: :class:`StreamQuery <StreamQuery>`
+
+        """
+        return self
+
+    def get_by_itag(self, itag: int) -> Optional[Stream]:
+        """Get the corresponding :class:`Stream <Stream>` for a given itag.
+
+        :param int itag:
+            YouTube format identifier code.
+        :rtype: :class:`Stream <Stream>` or None
+        :returns:
+            The :class:`Stream <Stream>` matching the given itag or None if
+            not found.
+
+        """
+        return self.itag_index.get(int(itag))
+
+    def get_by_resolution(self, resolution: str) -> Optional[Stream]:
+        """Get the corresponding :class:`Stream <Stream>` for a given resolution.
+
+        Stream must be a progressive mp4.
+
+        :param str resolution:
+            Video resolution i.e. "720p", "480p", "360p", "240p", "144p"
+        :rtype: :class:`Stream <Stream>` or None
+        :returns:
+            The :class:`Stream <Stream>` matching the given itag or None if
+            not found.
+
+        """
+        return self.filter(
+            progressive=True, subtype="mp4", resolution=resolution
+        ).first()
+
+    def get_lowest_resolution(self) -> Optional[Stream]:
+        """Get lowest resolution stream that is a progressive mp4.
+
+        :rtype: :class:`Stream <Stream>` or None
+        :returns:
+            The :class:`Stream <Stream>` matching the given itag or None if
+            not found.
+
+        """
+        return (
+            self.filter(progressive=True, subtype="mp4")
+            .order_by("resolution")
+            .first()
+        )
+
+    def get_highest_resolution(self) -> Optional[Stream]:
+        """Get highest resolution stream that is a progressive video.
+
+        :rtype: :class:`Stream <Stream>` or None
+        :returns:
+            The :class:`Stream <Stream>` matching the given itag or None if
+            not found.
+
+        """
+        return self.filter(progressive=True).order_by("resolution").last()
+
+    def get_audio_only(self, subtype: str = "mp4") -> Optional[Stream]:
+        """Get highest bitrate audio stream for given codec (defaults to mp4)
+
+        :param str subtype:
+            Audio subtype, defaults to mp4
+        :rtype: :class:`Stream <Stream>` or None
+        :returns:
+            The :class:`Stream <Stream>` matching the given itag or None if
+            not found.
+        """
+        return (
+            self.filter(only_audio=True, subtype=subtype)
+            .order_by("abr")
+            .last()
+        )
+
+    def otf(self, is_otf: bool = False) -> "StreamQuery":
+        """Filter stream by OTF, useful if some streams have 404 URLs
+
+        :param bool is_otf: Set to False to retrieve only non-OTF streams
+        :rtype: :class:`StreamQuery <StreamQuery>`
+        :returns: A StreamQuery object with otf filtered streams
+        """
+        return self._filter([lambda s: s.is_otf == is_otf])
+
+    def first(self) -> Optional[Stream]:
+        """Get the first :class:`Stream <Stream>` in the results.
+
+        :rtype: :class:`Stream <Stream>` or None
+        :returns:
+            the first result of this query or None if the result doesn't
+            contain any streams.
+
+        """
+        try:
+            return self.fmt_streams[0]
+        except IndexError:
+            return None
+
+    def last(self):
+        """Get the last :class:`Stream <Stream>` in the results.
+
+        :rtype: :class:`Stream <Stream>` or None
+        :returns:
+            Return the last result of this query or None if the result
+            doesn't contain any streams.
+
+        """
+        try:
+            return self.fmt_streams[-1]
+        except IndexError:
+            pass
+
+    @deprecated("Get the size of this list directly using len()")
+    def count(self, value: Optional[str] = None) -> int:  # pragma: no cover
+        """Get the count of items in the list.
+
+        :rtype: int
+        """
+        if value:
+            return self.fmt_streams.count(value)
+
+        return len(self)
+
+    @deprecated("This object can be treated as a list, all() is useless")
+    def all(self) -> List[Stream]:  # pragma: no cover
+        """Get all the results represented by this query as a list.
+
+        :rtype: list
+
+        """
+        return self.fmt_streams
+
+    def __getitem__(self, i: Union[slice, int]):
+        return self.fmt_streams[i]
+
+    def __len__(self) -> int:
+        return len(self.fmt_streams)
+
+    def __repr__(self) -> str:
+        return f"{self.fmt_streams}"
+
+
+class CaptionQuery(Mapping):
+    """Interface for querying the available captions."""
+
+    def __init__(self, captions: List[Caption]):
+        """Construct a :class:`Caption <Caption>`.
+
+        param list captions:
+            list of :class:`Caption <Caption>` instances.
+
+        """
+        self.lang_code_index = {c.code: c for c in captions}
+
+    @deprecated(
+        "This object can be treated as a dictionary, i.e. captions['en']"
+    )
+    def get_by_language_code(
+        self, lang_code: str
+    ) -> Optional[Caption]:  # pragma: no cover
+        """Get the :class:`Caption <Caption>` for a given ``lang_code``.
+
+        :param str lang_code:
+            The code that identifies the caption language.
+        :rtype: :class:`Caption <Caption>` or None
+        :returns:
+            The :class:`Caption <Caption>` matching the given ``lang_code`` or
+            None if it does not exist.
+        """
+        return self.lang_code_index.get(lang_code)
+
+    @deprecated("This object can be treated as a dictionary")
+    def all(self) -> List[Caption]:  # pragma: no cover
+        """Get all the results represented by this query as a list.
+
+        :rtype: list
+
+        """
+        return list(self.lang_code_index.values())
+
+    def __getitem__(self, i: str):
+        return self.lang_code_index[i]
+
+    def __len__(self) -> int:
+        return len(self.lang_code_index)
+
+    def __iter__(self):
+        return iter(self.lang_code_index.values())
+
+    def __repr__(self) -> str:
+        return f"{self.lang_code_index}"
diff --git a/pytube/request.py b/pytube/request.py
new file mode 100644
index 0000000000000000000000000000000000000000..e66a4642da68c0f9b121498ad0d71534741eee30
--- /dev/null
+++ b/pytube/request.py
@@ -0,0 +1,265 @@
+"""Implements a simple wrapper around urlopen."""
+import http.client
+import json
+import logging
+import re
+import socket
+from functools import lru_cache
+from urllib import parse
+from urllib.error import URLError
+from urllib.request import Request, urlopen
+
+from pytube.exceptions import RegexMatchError, MaxRetriesExceeded
+from pytube.helpers import regex_search
+
+logger = logging.getLogger(__name__)
+default_range_size = 9437184  # 9MB
+
+
+def _execute_request(
+    url,
+    method=None,
+    headers=None,
+    data=None,
+    timeout=socket._GLOBAL_DEFAULT_TIMEOUT
+):
+    base_headers = {"User-Agent": "Mozilla/5.0", "accept-language": "en-US,en"}
+    if headers:
+        base_headers.update(headers)
+    if data:
+        # encode data for request
+        if not isinstance(data, bytes):
+            data = bytes(json.dumps(data), encoding="utf-8")
+    if url.lower().startswith("http"):
+        request = Request(url, headers=base_headers, method=method, data=data)
+    else:
+        raise ValueError("Invalid URL")
+    return urlopen(request, timeout=timeout)  # nosec
+
+
+def get(url, extra_headers=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+    """Send an http GET request.
+
+    :param str url:
+        The URL to perform the GET request for.
+    :param dict extra_headers:
+        Extra headers to add to the request
+    :rtype: str
+    :returns:
+        UTF-8 encoded string of response
+    """
+    if extra_headers is None:
+        extra_headers = {}
+    response = _execute_request(url, headers=extra_headers, timeout=timeout)
+    return response.read().decode("utf-8")
+
+
+def post(url, extra_headers=None, data=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
+    """Send an http POST request.
+
+    :param str url:
+        The URL to perform the POST request for.
+    :param dict extra_headers:
+        Extra headers to add to the request
+    :param dict data:
+        The data to send on the POST request
+    :rtype: str
+    :returns:
+        UTF-8 encoded string of response
+    """
+    # could technically be implemented in get,
+    # but to avoid confusion implemented like this
+    if extra_headers is None:
+        extra_headers = {}
+    if data is None:
+        data = {}
+    # required because the youtube servers are strict on content type
+    # raises HTTPError [400]: Bad Request otherwise
+    extra_headers.update({"Content-Type": "application/json"})
+    response = _execute_request(
+        url,
+        headers=extra_headers,
+        data=data,
+        timeout=timeout
+    )
+    return response.read().decode("utf-8")
+
+
+def seq_stream(
+    url,
+    timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+    max_retries=0
+):
+    """Read the response in sequence.
+    :param str url: The URL to perform the GET request for.
+    :rtype: Iterable[bytes]
+    """
+    # YouTube expects a request sequence number as part of the parameters.
+    split_url = parse.urlsplit(url)
+    base_url = '%s://%s/%s?' % (split_url.scheme, split_url.netloc, split_url.path)
+
+    querys = dict(parse.parse_qsl(split_url.query))
+
+    # The 0th sequential request provides the file headers, which tell us
+    #  information about how the file is segmented.
+    querys['sq'] = 0
+    url = base_url + parse.urlencode(querys)
+
+    segment_data = b''
+    for chunk in stream(url, timeout=timeout, max_retries=max_retries):
+        yield chunk
+        segment_data += chunk
+
+    # We can then parse the header to find the number of segments
+    stream_info = segment_data.split(b'\r\n')
+    segment_count_pattern = re.compile(b'Segment-Count: (\\d+)')
+    for line in stream_info:
+        match = segment_count_pattern.search(line)
+        if match:
+            segment_count = int(match.group(1).decode('utf-8'))
+
+    # We request these segments sequentially to build the file.
+    seq_num = 1
+    while seq_num <= segment_count:
+        # Create sequential request URL
+        querys['sq'] = seq_num
+        url = base_url + parse.urlencode(querys)
+
+        yield from stream(url, timeout=timeout, max_retries=max_retries)
+        seq_num += 1
+    return  # pylint: disable=R1711
+
+
+def stream(
+    url,
+    timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
+    max_retries=0
+):
+    """Read the response in chunks.
+    :param str url: The URL to perform the GET request for.
+    :rtype: Iterable[bytes]
+    """
+    file_size: int = default_range_size  # fake filesize to start
+    downloaded = 0
+    while downloaded < file_size:
+        stop_pos = min(downloaded + default_range_size, file_size) - 1
+        range_header = f"bytes={downloaded}-{stop_pos}"
+        tries = 0
+
+        # Attempt to make the request multiple times as necessary.
+        while True:
+            # If the max retries is exceeded, raise an exception
+            if tries >= 1 + max_retries:
+                raise MaxRetriesExceeded()
+
+            # Try to execute the request, ignoring socket timeouts
+            try:
+                response = _execute_request(
+                    url,
+                    method="GET",
+                    headers={"Range": range_header},
+                    timeout=timeout
+                )
+            except URLError as e:
+                # We only want to skip over timeout errors, and
+                # raise any other URLError exceptions
+                if isinstance(e.reason, socket.timeout):
+                    pass
+                else:
+                    raise
+            except http.client.IncompleteRead:
+                # Allow retries on IncompleteRead errors for unreliable connections
+                pass
+            else:
+                # On a successful request, break from loop
+                break
+            tries += 1
+
+        if file_size == default_range_size:
+            try:
+                content_range = response.info()["Content-Range"]
+                file_size = int(content_range.split("/")[1])
+            except (KeyError, IndexError, ValueError) as e:
+                logger.error(e)
+        while True:
+            chunk = response.read()
+            if not chunk:
+                break
+            downloaded += len(chunk)
+            yield chunk
+    return  # pylint: disable=R1711
+
+
+@lru_cache()
+def filesize(url):
+    """Fetch size in bytes of file at given URL
+
+    :param str url: The URL to get the size of
+    :returns: int: size in bytes of remote file
+    """
+    return int(head(url)["content-length"])
+
+
+@lru_cache()
+def seq_filesize(url):
+    """Fetch size in bytes of file at given URL from sequential requests
+
+    :param str url: The URL to get the size of
+    :returns: int: size in bytes of remote file
+    """
+    total_filesize = 0
+    # YouTube expects a request sequence number as part of the parameters.
+    split_url = parse.urlsplit(url)
+    base_url = '%s://%s/%s?' % (split_url.scheme, split_url.netloc, split_url.path)
+    querys = dict(parse.parse_qsl(split_url.query))
+
+    # The 0th sequential request provides the file headers, which tell us
+    #  information about how the file is segmented.
+    querys['sq'] = 0
+    url = base_url + parse.urlencode(querys)
+    response = _execute_request(
+        url, method="GET"
+    )
+
+    response_value = response.read()
+    # The file header must be added to the total filesize
+    total_filesize += len(response_value)
+
+    # We can then parse the header to find the number of segments
+    segment_count = 0
+    stream_info = response_value.split(b'\r\n')
+    segment_regex = b'Segment-Count: (\\d+)'
+    for line in stream_info:
+        # One of the lines should contain the segment count, but we don't know
+        #  which, so we need to iterate through the lines to find it
+        try:
+            segment_count = int(regex_search(segment_regex, line, 1))
+        except RegexMatchError:
+            pass
+
+    if segment_count == 0:
+        raise RegexMatchError('seq_filesize', segment_regex)
+
+    # We make HEAD requests to the segments sequentially to find the total filesize.
+    seq_num = 1
+    while seq_num <= segment_count:
+        # Create sequential request URL
+        querys['sq'] = seq_num
+        url = base_url + parse.urlencode(querys)
+
+        total_filesize += int(head(url)['content-length'])
+        seq_num += 1
+    return total_filesize
+
+
+def head(url):
+    """Fetch headers returned http GET request.
+
+    :param str url:
+        The URL to perform the GET request for.
+    :rtype: dict
+    :returns:
+        dictionary of lowercase headers
+    """
+    response_headers = _execute_request(url, method="HEAD").info()
+    return {k.lower(): v for k, v in response_headers.items()}
diff --git a/pytube/streams.py b/pytube/streams.py
new file mode 100644
index 0000000000000000000000000000000000000000..05ec6c1afc4cc5c9f9c723024d8e7febb353ade1
--- /dev/null
+++ b/pytube/streams.py
@@ -0,0 +1,374 @@
+"""
+This module contains a container for stream manifest data.
+
+A container object for the media stream (video only / audio only / video+audio
+combined). This was referred to as ``Video`` in the legacy pytube version, but
+has been renamed to accommodate DASH (which serves the audio and video
+separately).
+"""
+import logging
+import os
+from datetime import datetime
+from typing import BinaryIO, Dict, Optional, Tuple
+from urllib.error import HTTPError
+from urllib.parse import parse_qs
+
+from pytube import extract, request
+from pytube.helpers import safe_filename, target_directory
+from pytube.itags import get_format_profile
+from pytube.monostate import Monostate
+
+logger = logging.getLogger(__name__)
+
+
+class Stream:
+    """Container for stream manifest data."""
+
+    def __init__(
+        self, stream: Dict, monostate: Monostate
+    ):
+        """Construct a :class:`Stream <Stream>`.
+
+        :param dict stream:
+            The unscrambled data extracted from YouTube.
+        :param dict monostate:
+            Dictionary of data shared across all instances of
+            :class:`Stream <Stream>`.
+        """
+        # A dictionary shared between all instances of :class:`Stream <Stream>`
+        # (Borg pattern).
+        self._monostate = monostate
+
+        self.url = stream["url"]  # signed download url
+        self.itag = int(
+            stream["itag"]
+        )  # stream format id (youtube nomenclature)
+
+        # set type and codec info
+
+        # 'video/webm; codecs="vp8, vorbis"' -> 'video/webm', ['vp8', 'vorbis']
+        self.mime_type, self.codecs = extract.mime_type_codec(stream["mimeType"])
+
+        # 'video/webm' -> 'video', 'webm'
+        self.type, self.subtype = self.mime_type.split("/")
+
+        # ['vp8', 'vorbis'] -> video_codec: vp8, audio_codec: vorbis. DASH
+        # streams return NoneType for audio/video depending.
+        self.video_codec, self.audio_codec = self.parse_codecs()
+
+        self.is_otf: bool = stream["is_otf"]
+        self.bitrate: Optional[int] = stream["bitrate"]
+
+        # filesize in bytes
+        self._filesize: Optional[int] = int(stream.get('contentLength', 0))
+
+        # Additional information about the stream format, such as resolution,
+        # frame rate, and whether the stream is live (HLS) or 3D.
+        itag_profile = get_format_profile(self.itag)
+        self.is_dash = itag_profile["is_dash"]
+        self.abr = itag_profile["abr"]  # average bitrate (audio streams only)
+        if 'fps' in stream:
+            self.fps = stream['fps']  # Video streams only
+        self.resolution = itag_profile[
+            "resolution"
+        ]  # resolution (e.g.: "480p")
+        self.is_3d = itag_profile["is_3d"]
+        self.is_hdr = itag_profile["is_hdr"]
+        self.is_live = itag_profile["is_live"]
+
+    @property
+    def is_adaptive(self) -> bool:
+        """Whether the stream is DASH.
+
+        :rtype: bool
+        """
+        # if codecs has two elements (e.g.: ['vp8', 'vorbis']): 2 % 2 = 0
+        # if codecs has one element (e.g.: ['vp8']) 1 % 2 = 1
+        return bool(len(self.codecs) % 2)
+
+    @property
+    def is_progressive(self) -> bool:
+        """Whether the stream is progressive.
+
+        :rtype: bool
+        """
+        return not self.is_adaptive
+
+    @property
+    def includes_audio_track(self) -> bool:
+        """Whether the stream only contains audio.
+
+        :rtype: bool
+        """
+        return self.is_progressive or self.type == "audio"
+
+    @property
+    def includes_video_track(self) -> bool:
+        """Whether the stream only contains video.
+
+        :rtype: bool
+        """
+        return self.is_progressive or self.type == "video"
+
+    def parse_codecs(self) -> Tuple[Optional[str], Optional[str]]:
+        """Get the video/audio codecs from list of codecs.
+
+        Parse a variable length sized list of codecs and returns a
+        constant two element tuple, with the video codec as the first element
+        and audio as the second. Returns None if one is not available
+        (adaptive only).
+
+        :rtype: tuple
+        :returns:
+            A two element tuple with audio and video codecs.
+
+        """
+        video = None
+        audio = None
+        if not self.is_adaptive:
+            video, audio = self.codecs
+        elif self.includes_video_track:
+            video = self.codecs[0]
+        elif self.includes_audio_track:
+            audio = self.codecs[0]
+        return video, audio
+
+    @property
+    def filesize(self) -> int:
+        """File size of the media stream in bytes.
+
+        :rtype: int
+        :returns:
+            Filesize (in bytes) of the stream.
+        """
+        if self._filesize == 0:
+            try:
+                self._filesize = request.filesize(self.url)
+            except HTTPError as e:
+                if e.code != 404:
+                    raise
+                self._filesize = request.seq_filesize(self.url)
+        return self._filesize
+
+    @property
+    def title(self) -> str:
+        """Get title of video
+
+        :rtype: str
+        :returns:
+            Youtube video title
+        """
+        return self._monostate.title or "Unknown YouTube Video Title"
+
+    @property
+    def filesize_approx(self) -> int:
+        """Get approximate filesize of the video
+
+        Falls back to HTTP call if there is not sufficient information to approximate
+
+        :rtype: int
+        :returns: size of video in bytes
+        """
+        if self._monostate.duration and self.bitrate:
+            bits_in_byte = 8
+            return int(
+                (self._monostate.duration * self.bitrate) / bits_in_byte
+            )
+
+        return self.filesize
+
+    @property
+    def expiration(self) -> datetime:
+        expire = parse_qs(self.url.split("?")[1])["expire"][0]
+        return datetime.utcfromtimestamp(int(expire))
+
+    @property
+    def default_filename(self) -> str:
+        """Generate filename based on the video title.
+
+        :rtype: str
+        :returns:
+            An os file system compatible filename.
+        """
+        filename = safe_filename(self.title)
+        return f"{filename}.{self.subtype}"
+
+    def download(
+        self,
+        output_path: Optional[str] = None,
+        filename: Optional[str] = None,
+        filename_prefix: Optional[str] = None,
+        skip_existing: bool = True,
+        timeout: Optional[int] = None,
+        max_retries: Optional[int] = 0
+    ) -> str:
+        """Write the media stream to disk.
+
+        :param output_path:
+            (optional) Output path for writing media file. If one is not
+            specified, defaults to the current working directory.
+        :type output_path: str or None
+        :param filename:
+            (optional) Output filename (stem only) for writing media file.
+            If one is not specified, the default filename is used.
+        :type filename: str or None
+        :param filename_prefix:
+            (optional) A string that will be prepended to the filename.
+            For example a number in a playlist or the name of a series.
+            If one is not specified, nothing will be prepended
+            This is separate from filename so you can use the default
+            filename but still add a prefix.
+        :type filename_prefix: str or None
+        :param skip_existing:
+            (optional) Skip existing files, defaults to True
+        :type skip_existing: bool
+        :param timeout:
+            (optional) Request timeout length in seconds. Uses system default.
+        :type timeout: int
+        :param max_retries:
+            (optional) Number of retries to attempt after socket timeout. Defaults to 0.
+        :type max_retries: int
+        :returns:
+            Path to the saved video
+        :rtype: str
+
+        """
+        file_path = self.get_file_path(
+            filename=filename,
+            output_path=output_path,
+            filename_prefix=filename_prefix,
+        )
+
+        if skip_existing and self.exists_at_path(file_path):
+            logger.debug(f'file {file_path} already exists, skipping')
+            self.on_complete(file_path)
+            return file_path
+
+        bytes_remaining = self.filesize
+        logger.debug(f'downloading ({self.filesize} total bytes) file to {file_path}')
+
+        with open(file_path, "wb") as fh:
+            try:
+                for chunk in request.stream(
+                    self.url,
+                    timeout=timeout,
+                    max_retries=max_retries
+                ):
+                    # reduce the (bytes) remainder by the length of the chunk.
+                    bytes_remaining -= len(chunk)
+                    # send to the on_progress callback.
+                    self.on_progress(chunk, fh, bytes_remaining)
+            except HTTPError as e:
+                if e.code != 404:
+                    raise
+                # Some adaptive streams need to be requested with sequence numbers
+                for chunk in request.seq_stream(
+                    self.url,
+                    timeout=timeout,
+                    max_retries=max_retries
+                ):
+                    # reduce the (bytes) remainder by the length of the chunk.
+                    bytes_remaining -= len(chunk)
+                    # send to the on_progress callback.
+                    self.on_progress(chunk, fh, bytes_remaining)
+        self.on_complete(file_path)
+        return file_path
+
+    def get_file_path(
+        self,
+        filename: Optional[str] = None,
+        output_path: Optional[str] = None,
+        filename_prefix: Optional[str] = None,
+    ) -> str:
+        if not filename:
+            filename = self.default_filename
+        if filename_prefix:
+            filename = f"{filename_prefix}{filename}"
+        return os.path.join(target_directory(output_path), filename)
+
+    def exists_at_path(self, file_path: str) -> bool:
+        return (
+            os.path.isfile(file_path)
+            and os.path.getsize(file_path) == self.filesize
+        )
+
+    def stream_to_buffer(self, buffer: BinaryIO) -> None:
+        """Write the media stream to buffer
+
+        :rtype: io.BytesIO buffer
+        """
+        bytes_remaining = self.filesize
+        logger.info(
+            "downloading (%s total bytes) file to buffer", self.filesize,
+        )
+
+        for chunk in request.stream(self.url):
+            # reduce the (bytes) remainder by the length of the chunk.
+            bytes_remaining -= len(chunk)
+            # send to the on_progress callback.
+            self.on_progress(chunk, buffer, bytes_remaining)
+        self.on_complete(None)
+
+    def on_progress(
+        self, chunk: bytes, file_handler: BinaryIO, bytes_remaining: int
+    ):
+        """On progress callback function.
+
+        This function writes the binary data to the file, then checks if an
+        additional callback is defined in the monostate. This is exposed to
+        allow things like displaying a progress bar.
+
+        :param bytes chunk:
+            Segment of media file binary data, not yet written to disk.
+        :param file_handler:
+            The file handle where the media is being written to.
+        :type file_handler:
+            :py:class:`io.BufferedWriter`
+        :param int bytes_remaining:
+            The delta between the total file size in bytes and amount already
+            downloaded.
+
+        :rtype: None
+
+        """
+        file_handler.write(chunk)
+        logger.debug("download remaining: %s", bytes_remaining)
+        if self._monostate.on_progress:
+            self._monostate.on_progress(self, chunk, bytes_remaining)
+
+    def on_complete(self, file_path: Optional[str]):
+        """On download complete handler function.
+
+        :param file_path:
+            The file handle where the media is being written to.
+        :type file_path: str
+
+        :rtype: None
+
+        """
+        logger.debug("download finished")
+        on_complete = self._monostate.on_complete
+        if on_complete:
+            logger.debug("calling on_complete callback %s", on_complete)
+            on_complete(self, file_path)
+
+    def __repr__(self) -> str:
+        """Printable object representation.
+
+        :rtype: str
+        :returns:
+            A string representation of a :class:`Stream <Stream>` object.
+        """
+        parts = ['itag="{s.itag}"', 'mime_type="{s.mime_type}"']
+        if self.includes_video_track:
+            parts.extend(['res="{s.resolution}"', 'fps="{s.fps}fps"'])
+            if not self.is_adaptive:
+                parts.extend(
+                    ['vcodec="{s.video_codec}"', 'acodec="{s.audio_codec}"',]
+                )
+            else:
+                parts.extend(['vcodec="{s.video_codec}"'])
+        else:
+            parts.extend(['abr="{s.abr}"', 'acodec="{s.audio_codec}"'])
+        parts.extend(['progressive="{s.is_progressive}"', 'type="{s.type}"'])
+        return f"<Stream: {' '.join(parts).format(s=self)}>"
diff --git a/pytube/version.py b/pytube/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..facad8adb3095438f19645eb554baa6896eab434
--- /dev/null
+++ b/pytube/version.py
@@ -0,0 +1,4 @@
+__version__ = "12.0.0.123"
+
+if __name__ == "__main__":
+    print(__version__)
diff --git a/repunct.py b/repunct.py
new file mode 100644
index 0000000000000000000000000000000000000000..57f352201085afeb72e8e4f1af76d3ab0d7faa51
--- /dev/null
+++ b/repunct.py
@@ -0,0 +1,7 @@
+from myrpunct import RestorePuncts
+
+def predict(input_text):
+    rpunct = RestorePuncts()
+    output_text = rpunct.punctuate(input_text)
+    print("Punctuation finished...")
+    return output_text
diff --git a/summarizer.py b/summarizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..171294c94e3f19ff4dea4ff923e4af24e2e18ed1
--- /dev/null
+++ b/summarizer.py
@@ -0,0 +1,153 @@
+import transcript as ts
+import ytvideo as vd
+import frames as fr
+#import repunct as rp
+import lexrank as lr
+
+# import sys
+# del sys.modules['ytvideo']
+# del sys.modules['transcript']
+# del sys.modules['frames']
+# del sys.modules['lexrank']
+
+
+#########################################################################
+# LEXRANK SUMMARY
+#########################################################################
+
+def getSummaryImage(link, lexrank_switch, rpunkt_switch):
+
+    # cleanup the working directory
+    #result = fr.removeFilesInWorkdir()
+    #print('removeFilesInWorkdir result: ',result)
+
+    if len(link) == 0:
+        return 'Error: no link provided'
+
+    print('getting transcript using link: ', link)
+    raw_transcript, type_transcript = ts.get_json_transcript(link,rpunkt_switch)
+    print('transcript type: ', type_transcript)
+    #timestamps = ts.get_timestamps(raw_transcript)
+    raw_caption = ts.get_caption(raw_transcript)
+
+    # module rpunct
+    # restore punctuations from raw captions
+    # if necessary
+    pnct_raw_transcript = raw_transcript
+    pnct_caption = raw_caption
+
+    dict_sentences = ts.getSentences(pnct_raw_transcript)
+    
+    concat_list_summary = 'empty'
+    if lexrank_switch:
+        # summarize small part of the text
+        nr_sentences = round(len(dict_sentences)*0.05)
+        trunc_pnct_caption = ' '.join(dict_sentences.values())
+        list_summary = lr.getSummary(trunc_pnct_caption,nr_sentences)
+        # it can happen that for lexrank a sentence conists of multiple actual sentences, 
+        # that are separated with full stops. Then the correspoinding timestamp cannot be found
+        # all items from the lexrank summary must be concatinated and split up by full stops.
+        concat_list_summary = '. '.join([str(item) for item in list_summary]).split('. ')
+        print('zip: '+str(nr_sentences))
+        if nr_sentences == 0:
+            return 'Error: No sentences available', None 
+    else:
+        concat_list_summary = [*dict_sentences.values()]
+    
+    dict_timestamp_summary = ts.getTimestampAtFrameFromSummary(pnct_raw_transcript,dict_sentences,concat_list_summary)
+    if 'Error' in dict_timestamp_summary:
+        return dict_timestamp_summary
+
+    result_get_video=vd.get_video(link)
+    print('video: '+result_get_video)
+    
+    proc_list = fr.extractImagesFromVideo(dict_timestamp_summary.keys())
+    print('frames: '+str(proc_list))
+
+    images = ts.getImages(dict_timestamp_summary)
+
+    return images
+
+
+def getSummary(link, lexrank_switch, rpunkt_switch):
+
+    # cleanup the working directory
+    #result = fr.removeFilesInWorkdir()
+    #print('removeFilesInWorkdir result: ',result)
+
+    if len(link) == 0:
+        return 'Error: no link provided'
+
+    print('getting transcript using link: ', link)
+    raw_transcript, type_transcript = ts.get_json_transcript(link,rpunkt_switch)
+    print('transcript type: ', type_transcript)
+    #timestamps = ts.get_timestamps(raw_transcript)
+    raw_caption = ts.get_caption(raw_transcript)
+
+    # module rpunct
+    # restore punctuations from raw captions
+    # if necessary
+    pnct_raw_transcript = raw_transcript
+    pnct_caption = raw_caption
+
+    if rpunkt_switch:
+        #if type_transcript[0] == 'en':
+        # the variable type_transcript[1] contains the text 'generated' or 'translated'
+        print('Recovering punctuation from english text...', type_transcript[1])
+        # remove punctuation leftovers
+        #clean_raw_caption = re.sub('[,?.!]','',raw_caption)
+        caption = rp.predict(raw_caption)
+        pnct_caption = ts.restore_cr(raw_caption,caption)
+        pnct_raw_transcript = ts.replacePunctuatedText(raw_transcript, pnct_caption)
+        
+    dict_sentences = ts.getSentences(pnct_raw_transcript)
+    
+    concat_list_summary = 'empty'
+    if lexrank_switch:
+        # summarize small part of the text
+        nr_sentences = round(len(dict_sentences)*0.05)
+        trunc_pnct_caption = ' '.join(dict_sentences.values())
+        list_summary = lr.getSummary(trunc_pnct_caption,nr_sentences)
+        # it can happen that for lexrank a sentence conists of multiple actual sentences, 
+        # that are separated with full stops. Then the correspoinding timestamp cannot be found
+        # all items from the lexrank summary must be concatinated and split up by full stops.
+        concat_list_summary = '. '.join([str(item) for item in list_summary]).split('. ')
+        print('zip: '+str(nr_sentences))
+        if nr_sentences == 0:
+            return 'Error: No sentences available', None 
+    else:
+        concat_list_summary = [*dict_sentences.values()]
+    
+    dict_timestamp_summary = ts.getTimestampAtFrameFromSummary(pnct_raw_transcript,dict_sentences,concat_list_summary)
+    if 'Error' in dict_timestamp_summary:
+        return dict_timestamp_summary
+
+    result_get_video=vd.get_video(link)
+    print('video: '+result_get_video)
+    
+    proc_list = fr.extractImagesFromVideo(dict_timestamp_summary.keys())
+    print('frames: '+str(proc_list))
+
+    html_file = ts.convertToHTML(dict_timestamp_summary)
+    images = ts.getImages(dict_timestamp_summary)
+
+    return html_file, images
+
+
+#filename='/Users/hujo/Downloads/Channel_Summaries/wholesaleted.srt.pnct.txt'
+#with open(filename, 'w') as the_file:
+#    the_file.write(raw_caption)
+
+#link ="https://www.youtube.com/watch?v=8uQDDUfGNPA" # blog
+#link = "https://www.youtube.com/watch?v=ofZEo0Rzo5s" # h-educate
+#link = 'https://www.youtube.com/watch?v=ReHGSGwV4-A' #wholesale ted
+#link = 'https://www.youtube.com/watch?v=n8JHnLgodRI' #kevindavid
+#link = 'https://www.youtube.com/watch?v=6MI0f6YjJIk' # Nicholas
+#link = 'https://www.youtube.com/watch?v=bj9snrsSook' #Geldschnurrbart
+#link = 'https://www.youtube.com/watch?v=lCnHfTHkhbE' #fcc tutorial
+#link = 'https://www.youtube.com/watch?v=0kJz0q0pvgQ&feature=youtu.be' # fcc
+
+#lexrank = True
+#result = getSummary(link, lexrank)
+#print(result)
+
diff --git a/transcript.py b/transcript.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a90bf433f7eae9a116e95a202a392369995f978
--- /dev/null
+++ b/transcript.py
@@ -0,0 +1,201 @@
+from youtube_transcript_api import YouTubeTranscriptApi
+import re
+from PIL import Image
+
+
+#transcript_list = YouTubeTranscriptApi.list_transcripts('ReHGSGwV4-A')
+#transcript = transcript_list.find_transcript(['en','de'])
+
+# step 1: download the json transcript for youtube video
+def get_json_transcript(link,rpunkt_switch):
+    if "v=" in link:
+        video_id = link.split("v=")[1].split("&")[0]
+    else:
+        return "Error: Invalid Link, it does not have the pattern 'v=' in it."
+    
+
+    transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
+    # get the auto-generated english text
+    # if it is not available translate to en
+    raw_transcript = 'empty'
+    type_transcript = []
+    if rpunkt_switch:
+        try:
+            transcript = transcript_list.find_generated_transcript(['en'])
+            raw_transcript = transcript.fetch()
+            type_transcript = ['en','generated']
+        except:
+            transcript = transcript_list.find_transcript(['de'])
+            raw_transcript = transcript.translate('en').fetch()
+            type_transcript = ['en','translated']
+    else:
+        transcript = transcript_list.find_transcript(['en','de'])
+        raw_transcript = transcript.fetch()
+        type_transcript = ['den','manual']
+
+    return raw_transcript, type_transcript
+
+# step 2: extract timestamps from json transcript
+def get_timestamps(transcript_raw):
+    transcript_timestamps = '\n'.join([str(i['start']) for i in transcript_raw])
+    return transcript_timestamps.split('\n')
+
+# step 3: extract text from transcript
+def get_caption(transcript_raw):
+    transcript_text = '\n'.join([i['text'].replace('\n',' ') for i in transcript_raw])
+    return transcript_text
+
+def replacePunctuatedText(raw_transcript, caption):
+    list_caption = caption.split('\n')
+    pnct_raw_transcript = raw_transcript
+
+    for (idx, line) in enumerate(pnct_raw_transcript):
+        line['text']=list_caption[idx]
+    
+    return pnct_raw_transcript
+
+def getSentences(raw_transcript):
+    # walk over each frame and extract the time stamp and the text 
+    # the time stamp is wrapped in hash tag signs
+    frm_cap = ''
+    for (idx, line) in enumerate(raw_transcript, start=1):
+        frm_cap = frm_cap+' #'+str(idx)+'# '+line['text'].replace('\n',' ').replace('\n',' ')
+
+
+    dict_sentences = {}
+    sentences = frm_cap.strip().split('. ')
+    # small sentences that do not have an own frame are dropped
+    # sentences that are less than 20 letters large are dropped, too
+    # this is useful, so that lexrank does not picks the short sentences
+    for idx,item in enumerate(sentences):
+        m = re.search(r"#[^#]*#", item)
+        if m is not None:
+            match = m.group(0)
+        frm = match.replace('#','')
+        clean_match = re.sub('\s*#[^#]*#\s*',' ',item) + '.'
+        if len(clean_match) > 20:
+            dict_sentences[frm] = clean_match.strip()
+        
+    
+    return dict_sentences
+
+
+    # split all sentences into an array
+    # remove all timestamps in the middle of the sentences
+    # leave only the timestamps at the beginning of each sentence 
+    # restore the full-stop sign at the end of each sentence, that was removed in the split step
+    #chops = ''
+    #for item in sl.strip().split('. '):
+    #    chops = chops + re.sub('\s*#[^#]*#\s*',' ',item) + '. '
+    #chops
+
+    # remove all remaining hash tags
+    #dsl={}
+    #for item in chops.split('. #'):
+    #    elem= item.split('# ')
+    #    idx = elem[0].replace('#','')
+    #    sentence = elem[1]+'.'
+    #    dsl[idx] = sentence
+
+    #return dsl
+
+def convertToHTML(dsl):
+    workdir = 'file/workdir/'
+    cnt=1
+    html_rows = '<table border=1>'
+    html_rows = html_rows + '<tr><td>Image Nr.</td><td>Timestamp [sec]</td><td>Image</td><td>Caption</td>'
+    for (key,val) in dsl.items():
+        image='frame_'+f"{int(cnt):04d}"+'.jpg'
+        sentence = val 
+        row = '<tr><td>'+str(cnt)+'</td>'
+        #row = row +'<td>'+f"{int(key):04d}"+'</td>'
+        row = row +'<td>'+key+'</td>'
+        row = row +'<td><a href='+workdir+image+'><img src="'+workdir+image+'" width=500></a></td>'
+        row = row +'<td>'+sentence+'</td></tr>\n'
+        html_rows = html_rows + row
+        cnt = cnt+1
+    html_rows = html_rows + '</table>'
+
+
+    filename='./workdir/output.html'
+    with open(filename, 'w') as the_file:
+        the_file.write(html_rows)
+
+    return html_rows
+
+def getImages(dsl):
+    images = []
+    workdir = 'workdir/'
+    cnt=1
+    for (key,val) in dsl.items():
+        image='frame_'+f"{int(cnt):04d}"+'.jpg'
+        image_path = workdir+image
+        pil_im = Image.open(image_path)
+        images.append(pil_im)
+        cnt=cnt+1
+
+    return images 
+
+
+# 1.
+# dict_sentences contains all sentences with the frame-nr
+# list_summary contains the summed sentences
+# the task is to find for all summarized sentences the corresponding frame-nr
+# 2.
+# dict_frame_timestamp contains a mapping of frames to the timestamps
+# 3.
+# it is used to construct the sum_timestamps list of the timestamps for each summarized sentence
+def getTimestampAtFrameFromSummary(raw_transcript, dict_sentences,list_summary):
+    dict_summary = {}
+    for key, value in dict_sentences.items():
+        for sentence in list_summary:
+            if str(sentence) in value:
+                dict_summary[key]=value
+
+    # sanity check, if the number of summarized sentences was found
+    if len(list_summary) != len(dict_summary):
+        err_msg = 'Error: Number of summarized sentences '+str(len(list_summary)) +' is not equal to the identified sentences '+str(len(dict_summary))+'.'
+        print(err_msg)
+        return err_msg
+
+    dict_frame_timestamp = {}
+    for (idx, line) in enumerate(raw_transcript, start=1):
+        dict_frame_timestamp[str(idx)] = str(line['start'])
+
+    sum_timestamps = []
+    for key in dict_summary.keys():
+        sum_timestamps.append(dict_frame_timestamp.get(key))
+
+    dict_timestamp_summary = {}
+    for (idx,value) in enumerate(list_summary):
+        timestamp = sum_timestamps[idx]
+        dict_timestamp_summary[timestamp] = str(value)
+
+    return dict_timestamp_summary
+
+
+def restore_cr(input_text, output_text):
+    # restore the carrige returns 
+    srt_file = input_text
+    punctuated = output_text
+
+    srt_file_strip=srt_file.strip()
+    srt_file_sub=re.sub('\s*\n\s*','# ',srt_file_strip)
+    srt_file_array=srt_file_sub.split(' ')
+    pcnt_file_array=punctuated.split(' ')
+
+    # goal: restore the break points i.e. the same number of lines as the srt file
+    # this is necessary, because each line in the srt file corresponds to a frame from the video
+    if len(srt_file_array)!=len(pcnt_file_array):
+        return "AssertError: The length of the transcript and the punctuated file should be the same: ",len(srt_file_array),len(pcnt_file_array)
+    pcnt_file_array_hash = []
+    for idx, item in enumerate(srt_file_array):
+        if item.endswith('#'):
+            pcnt_file_array_hash.append(pcnt_file_array[idx]+'#')
+        else:
+            pcnt_file_array_hash.append(pcnt_file_array[idx])
+
+    # assemble the array back to a string
+    pcnt_file_cr=' '.join(pcnt_file_array_hash).replace('#','\n')
+
+    return pcnt_file_cr
diff --git a/workdir/lion.jpg b/workdir/lion.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e9bf9f5d0816d6201b4862088dc74476249a6a70
Binary files /dev/null and b/workdir/lion.jpg differ
diff --git a/ytvideo.py b/ytvideo.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d3e57c11ab07e8bd4b51ebe7a4a4f22c539654a
--- /dev/null
+++ b/ytvideo.py
@@ -0,0 +1,54 @@
+from curses import error
+from pytube import YouTube
+import os
+
+def get_obj_from_link(link):
+    yt = YouTube(link)
+    return yt
+
+
+def get_filename_title(link):
+    if "v=" in link:
+        video_id = link.split("v=")[1]
+    else:
+        return "Error: Invalid Link, it does not have the pattern 'v=' in it."
+    yt = get_obj_from_link(link)
+    # catch urllib.error.URLError
+    try:
+        title=yt.title
+    except Exception as e:
+        error_msg = 'Error: Retreiving the video title failed.'
+        print(error_msg,e)
+        return e
+
+    # create title
+    import re
+    title_= re.sub(' ', '_', title)
+    filetitle=re.sub('[^0-9a-zA-Z_äüöß-]+','',title_)
+    filetitle_vid =filetitle+'='+video_id 
+    return filetitle_vid
+
+def get_video(link):
+    yt = get_obj_from_link(link)
+    #yt = YouTube(link)
+    #filetitle = get_filename_title(yt)
+    filetitle = 'input_video'
+    # download video
+    working_directory = './workdir/'
+    filetitle_mp4=working_directory+filetitle+'.mp4'
+
+
+    # create a working directory for the files
+    if not os.path.isdir(working_directory):
+        print('There is no working directory. Create a new one.')
+        os.mkdir(working_directory)
+
+    # catch urllib.error.URLError
+    try:
+        result = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(filename=filetitle_mp4)
+    except Exception as e:
+        error_msg = 'Error: Retreiving the video failed.'+result
+        print(error_msg,e)
+        return e
+
+    return result

Image Nr.	Timestamp [sec]	Image	Caption
'+str(cnt)+'	'+f"{int(key):04d}"+'	'+key+'		'+sentence+'