Spaces:

wldmr
/

tubifier

Sleeping

App Files Files Community

wldmr commited on Nov 11, 2022

Commit

837fdb6

•

1 Parent(s): 84ea2c9

app file

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +44 -48
frames.py +102 -0
lexrank.py +24 -0
myrpunct/__init__.py +2 -0
myrpunct/__pycache__/__init__.cpython-310.pyc +0 -0
myrpunct/__pycache__/__init__.cpython-39.pyc +0 -0
myrpunct/__pycache__/punctuate.cpython-310.pyc +0 -0
myrpunct/__pycache__/punctuate.cpython-39.pyc +0 -0
myrpunct/punctuate.py +174 -0
myrpunct/utils.py +34 -0
pytube/__init__.py +19 -0
pytube/__main__.py +467 -0
pytube/__pycache__/__init__.cpython-310.pyc +0 -0
pytube/__pycache__/__init__.cpython-39.pyc +0 -0
pytube/__pycache__/__main__.cpython-310.pyc +0 -0
pytube/__pycache__/__main__.cpython-39.pyc +0 -0
pytube/__pycache__/captions.cpython-310.pyc +0 -0
pytube/__pycache__/captions.cpython-39.pyc +0 -0
pytube/__pycache__/cipher.cpython-310.pyc +0 -0
pytube/__pycache__/cipher.cpython-39.pyc +0 -0
pytube/__pycache__/exceptions.cpython-310.pyc +0 -0
pytube/__pycache__/exceptions.cpython-39.pyc +0 -0
pytube/__pycache__/extract.cpython-310.pyc +0 -0
pytube/__pycache__/extract.cpython-39.pyc +0 -0
pytube/__pycache__/helpers.cpython-310.pyc +0 -0
pytube/__pycache__/helpers.cpython-39.pyc +0 -0
pytube/__pycache__/innertube.cpython-310.pyc +0 -0
pytube/__pycache__/innertube.cpython-39.pyc +0 -0
pytube/__pycache__/itags.cpython-310.pyc +0 -0
pytube/__pycache__/itags.cpython-39.pyc +0 -0
pytube/__pycache__/metadata.cpython-310.pyc +0 -0
pytube/__pycache__/metadata.cpython-39.pyc +0 -0
pytube/__pycache__/monostate.cpython-310.pyc +0 -0
pytube/__pycache__/monostate.cpython-39.pyc +0 -0
pytube/__pycache__/parser.cpython-310.pyc +0 -0
pytube/__pycache__/parser.cpython-39.pyc +0 -0
pytube/__pycache__/query.cpython-310.pyc +0 -0
pytube/__pycache__/query.cpython-39.pyc +0 -0
pytube/__pycache__/request.cpython-310.pyc +0 -0
pytube/__pycache__/request.cpython-39.pyc +0 -0
pytube/__pycache__/streams.cpython-310.pyc +0 -0
pytube/__pycache__/streams.cpython-39.pyc +0 -0
pytube/__pycache__/version.cpython-310.pyc +0 -0
pytube/__pycache__/version.cpython-39.pyc +0 -0
pytube/captions.py +154 -0
pytube/cipher.py +697 -0
pytube/cli.py +560 -0
pytube/contrib/__init__.py +0 -0
pytube/contrib/__pycache__/__init__.cpython-310.pyc +0 -0
pytube/contrib/__pycache__/__init__.cpython-39.pyc +0 -0

app.py CHANGED Viewed

@@ -1,55 +1,51 @@
-# main.py
-from fastapi import FastAPI
 from PIL import Image
-import base64
-from fastapi.responses import HTMLResponse, FileResponse
-app = FastAPI()
-@app.get("/")
-async def root():
-    return FileResponse(path="static/index.html", media_type="text/html")
-@app.get("/html")
-async def root():
-    """Basic HTML response."""
-    body = (
-        "<html>"
-        "<body style='padding: 10px;'>"
-        "<h1>Welcome to the API</h1>"
-    "<div>"
-        "Check the docs: <a href='/docs'>here</a>"
-        "</div>"
-        "</body>"
-        "</html>"
-    )
-    return HTMLResponse(content=body)
-@app.get("/api")
-async def cal_api():
     images = []
-    with open('workdir/lion.jpg', 'rb') as open_file:
-        byte_content = open_file.read()
-    base64_bytes = base64.b64encode(byte_content)
-    base64_string = base64_bytes.decode('utf-8')
-    images.append(base64_string)
-    with open('workdir/cheetah.jpg', 'rb') as open_file:
-        byte_content = open_file.read()
-    base64_bytes = base64.b64encode(byte_content)
-    base64_string = base64_bytes.decode('utf-8')
-    images.append(base64_string)
-    #image_path='lion.jpg'
-    #pilim = Image.open(image_path)
-    #pilimrot = pilim.rotate(45)
-    return {"data": images}
-@app.get("/items/{item_id}")
-async def read_item(item_id):
-    return {"item_id": item_id}

+import gradio as gr
 from PIL import Image
+import os
+import summarizer as su
+import nltk
+def image_mod(rpunkt_switch, link):
+    if len(link)==0:
+        return 'Error: No link provided', None
+    nltk_file = 'nltk_data/tokenizers/punkt.zip'
+    home_pc = '/Users/hujo/'
+    home_hf = '/home/user/'
+    if os.path.exists(home_pc+nltk_file) or os.path.exists(home_hf+nltk_file):
+        print('nltk punkt file exists in ', nltk_file)
+    else:
+        nltk.download('punkt')
+    #link = 'https://www.youtube.com/watch?v=lCnHfTHkhbE'
+    lexrank_switch = True
+    html = ''
     images = []
+    html, images = su.getSummary(link, lexrank_switch, rpunkt_switch)
+    #images = su.getSummaryImage(link, lexrank_switch, rpunkt_switch)
+    print(html)
+    files = os.listdir('workdir/')
+    print('local files: ',files)
+    #image_path = 'workdir/lion.jpg'
+    #im = Image.open(image_path)
+    #images.append(im)
+    #with Image.open(open(image_path,'rb')) as im:
+    #    images.append(im)
+        #images.append(im.rotate(90))
+    #images[0].save("newlion.png")
+    print('images',images)
+    return html, images
+demo = gr.Interface(image_mod,
+       [gr.Checkbox(label='Restore runctuation'), "text"] , ["html", gr.Gallery()],
+       allow_flagging="never")
+if __name__ == "__main__":
+    demo.launch()

frames.py ADDED Viewed

	@@ -0,0 +1,102 @@

+from ast import Try
+import subprocess as sp
+import os
+# show current venv: echo $VIRTUAL_ENV
+# import sys
+# del sys.modules['frames']
+# transcript module
+# 1. extract timestamps from transcript
+# 2. extract captions from transcript
+# this module
+# 3. extract frames at timestamps
+# 4. add caption to each frame
+# 5. convert images to mp4 video
+# converts a list of images to a mp4 video
+def convertImageToVideo():
+    cmd = "ffmpeg -y -f image2 -i frame_%04d.jpg output_video.mp4"
+    cmd_call = cmd.split()
+    working_dir = './workdir'
+    with sp.Popen(cmd_call,cwd=working_dir, stderr=sp.PIPE) as proc:
+        result = proc.stderr.read()
+    return [proc.wait(),result]
+# extract a frame as jpg image file
+# from a video at a given timestamp
+# num=0; for p in $(cat timestamps); do ((num++)); printf "$num $p\r"; dnum=$(printf "%03d" "$num"); ffmpeg -ss $p -i "$mp4file" -frames:v 1 out_$dnum.jpg >& ffmpeg.out; done
+def extractImagesFromVideo(timestamps):
+    working_dir = './workdir'
+    input_file = 'input_video.mp4'
+    if not os.path.isfile(working_dir+'/'+input_file):
+        return 'Error: File '+input_file+' is missing, create the file first.'
+    # create a working directory for the files
+    if not os.path.isdir(working_dir):
+        print('There is no working directory. Create a new one.')
+        os.mkdir(working_dir)
+    proc_list = []
+    for current_frame, current_timestamp in enumerate(timestamps, start=1):
+        print(f"{current_frame:04d}", current_timestamp)
+        cmd = 'ffmpeg -y -ss '+str(current_timestamp)+' -i '+input_file+' -frames:v 1 frame_'+f"{current_frame:04d}"+'.jpg'
+        cmd_call = cmd.split()
+        with sp.Popen(cmd_call,cwd=working_dir, stderr=sp.PIPE) as proc:
+            proc_list.append(proc.wait())
+    return proc_list
+# add caption to each image
+# 'convert' porgram is from the 'imagemagick' package
+# num=0; while read p; do ((num++)); dnum=$(printf "%03d" "$num"); printf "$dnum $p\r"; convert out_$dnum.jpg -undercolor Black -fill white -gravity South -pointsize 25 -annotate +0+10     "$p" out_$dnum.jpg >& ffmpeg.out; done<srt.txt
+def addCaptionToImage(caption):
+    proc_list = []
+    for current_frame, current_caption in enumerate(caption.split('\n'), start=1):
+        print(f"{current_frame:04d}", current_caption)
+        #current_frame=182
+        #current_caption='with this method as compared to just'
+        cmd = 'convert frame_'+f"{current_frame:04d}"+'.jpg -undercolor Black -fill white -gravity South -pointsize 25 -annotate +0+10'
+        cmd_call = cmd.split()
+        # the 'split' command would also split the input caption
+        # therefore it has to be added to the array after the split
+        cmd_call.append(current_caption)
+        cmd_call.append('frame_'+f"{current_frame:04d}"+'.jpg')
+        #cmd_call
+        working_dir = './workdir'
+        with sp.Popen(cmd_call,cwd=working_dir, stderr=sp.PIPE) as proc:
+            proc_list.append(proc.wait())
+    return proc_list
+def removeFilesInWorkdir():
+    result =''
+    working_dir = './workdir'
+    try:
+        for f in os.listdir(working_dir):
+            os.remove(os.path.join(working_dir, f))
+    except:
+        result = 'Error: Not all files could be removed.'
+    return result
+def renameOutputVideo(filenme):
+    result = ''
+    working_dir = './workdir'
+    shelf_dir = './shelf'
+    input_filename = working_dir+'/'+'output_video.mp4'
+    output_filename = shelf_dir+'/'+filenme+'.mp4'
+    try:
+        os.rename(input_filename,output_filename)
+    except:
+        result = 'Error: Could not rename file.'
+    return result

lexrank.py ADDED Viewed

	@@ -0,0 +1,24 @@

+#import nltk
+#nltk.download('punkt')
+from sumy.parsers.html import HtmlParser
+from sumy.parsers.plaintext import PlaintextParser
+from sumy.nlp.tokenizers import Tokenizer
+from sumy.summarizers.lex_rank import LexRankSummarizer
+from sumy.nlp.stemmers import Stemmer
+from sumy.utils import get_stop_words
+def getSummary(text, nr_sentences):
+    summary=[]
+    LANGUAGE = "english"
+    SENTENCES_COUNT = nr_sentences
+    #parser = PlaintextParser.from_file("/Users/hujo/Downloads/Channel_Summaries/wholesaleted.srt.pnct.txt", Tokenizer(LANGUAGE))
+    parser = PlaintextParser.from_string(text, Tokenizer(LANGUAGE))
+    #print(parser.document)
+    stemmer = Stemmer(LANGUAGE)
+    summarizer = LexRankSummarizer(stemmer)
+    summarizer.stop_words = get_stop_words(LANGUAGE)
+    for sentence in summarizer(parser.document, SENTENCES_COUNT):
+        summary.append(sentence)
+    return summary

myrpunct/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from .punctuate import RestorePuncts
2	+ print("init executed ...")

myrpunct/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (231 Bytes). View file

myrpunct/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (227 Bytes). View file

myrpunct/__pycache__/punctuate.cpython-310.pyc ADDED Viewed

Binary file (5.71 kB). View file

myrpunct/__pycache__/punctuate.cpython-39.pyc ADDED Viewed

Binary file (5.69 kB). View file

myrpunct/punctuate.py ADDED Viewed

	@@ -0,0 +1,174 @@

+# -*- coding: utf-8 -*-
+# 💾⚙️🔮
+__author__ = "Daulet N."
+__email__ = "daulet.nurmanbetov@gmail.com"
+import logging
+from langdetect import detect
+from simpletransformers.ner import NERModel, NERArgs
+class RestorePuncts:
+    def __init__(self, wrds_per_pred=250, use_cuda=False):
+        self.wrds_per_pred = wrds_per_pred
+        self.overlap_wrds = 30
+        self.valid_labels = ['OU', 'OO', '.O', '!O', ',O', '.U', '!U', ',U', ':O', ';O', ':U', "'O", '-O', '?O', '?U']
+        self.model_hf = "wldmr/felflare-bert-restore-punctuation"
+        self.model_args = NERArgs()
+        self.model_args.silent = True
+        self.model_args.max_seq_length = 512
+        #self.model_args.use_multiprocessing = False
+        self.model = NERModel("bert", self.model_hf, labels=self.valid_labels, use_cuda=use_cuda, args=self.model_args)
+        #self.model = NERModel("bert", self.model_hf, labels=self.valid_labels, use_cuda=use_cuda, args={"silent": True, "max_seq_length": 512, "use_multiprocessing": False})
+        print("class init ...")
+        print("use_multiprocessing: ",self.model_args.use_multiprocessing)
+    def status(self):
+        print("function called")
+    def punctuate(self, text: str, lang:str=''):
+        """
+        Performs punctuation restoration on arbitrarily large text.
+        Detects if input is not English, if non-English was detected terminates predictions.
+        Overrride by supplying `lang='en'`
+        Args:
+            - text (str): Text to punctuate, can be few words to as large as you want.
+            - lang (str): Explicit language of input text.
+        """
+        if not lang and len(text) > 10:
+            lang = detect(text)
+        if lang != 'en':
+            raise Exception(F"""Non English text detected. Restore Punctuation works only for English.
+            If you are certain the input is English, pass argument lang='en' to this function.
+            Punctuate received: {text}""")
+        # plit up large text into bert digestable chunks
+        splits = self.split_on_toks(text, self.wrds_per_pred, self.overlap_wrds)
+        # predict slices
+        # full_preds_lst contains tuple of labels and logits
+        full_preds_lst = [self.predict(i['text']) for i in splits]
+        # extract predictions, and discard logits
+        preds_lst = [i[0][0] for i in full_preds_lst]
+        # join text slices
+        combined_preds = self.combine_results(text, preds_lst)
+        # create punctuated prediction
+        punct_text = self.punctuate_texts(combined_preds)
+        return punct_text
+    def predict(self, input_slice):
+        """
+        Passes the unpunctuated text to the model for punctuation.
+        """
+        predictions, raw_outputs = self.model.predict([input_slice])
+        return predictions, raw_outputs
+    @staticmethod
+    def split_on_toks(text, length, overlap):
+        """
+        Splits text into predefined slices of overlapping text with indexes (offsets)
+        that tie-back to original text.
+        This is done to bypass 512 token limit on transformer models by sequentially
+        feeding chunks of < 512 toks.
+        Example output:
+        [{...}, {"text": "...", 'start_idx': 31354, 'end_idx': 32648}, {...}]
+        """
+        wrds = text.replace('\n', ' ').split(" ")
+        resp = []
+        lst_chunk_idx = 0
+        i = 0
+        while True:
+            # words in the chunk and the overlapping portion
+            wrds_len = wrds[(length * i):(length * (i + 1))]
+            wrds_ovlp = wrds[(length * (i + 1)):((length * (i + 1)) + overlap)]
+            wrds_split = wrds_len + wrds_ovlp
+            # Break loop if no more words
+            if not wrds_split:
+                break
+            wrds_str = " ".join(wrds_split)
+            nxt_chunk_start_idx = len(" ".join(wrds_len))
+            lst_char_idx = len(" ".join(wrds_split))
+            resp_obj = {
+                "text": wrds_str,
+                "start_idx": lst_chunk_idx,
+                "end_idx": lst_char_idx + lst_chunk_idx,
+            }
+            resp.append(resp_obj)
+            lst_chunk_idx += nxt_chunk_start_idx + 1
+            i += 1
+        logging.info(f"Sliced transcript into {len(resp)} slices.")
+        return resp
+    @staticmethod
+    def combine_results(full_text: str, text_slices):
+        """
+        Given a full text and predictions of each slice combines predictions into a single text again.
+        Performs validataion wether text was combined correctly
+        """
+        split_full_text = full_text.replace('\n', ' ').split(" ")
+        split_full_text = [i for i in split_full_text if i]
+        split_full_text_len = len(split_full_text)
+        output_text = []
+        index = 0
+        if len(text_slices[-1]) <= 3 and len(text_slices) > 1:
+            text_slices = text_slices[:-1]
+        for _slice in text_slices:
+            slice_wrds = len(_slice)
+            for ix, wrd in enumerate(_slice):
+                # print(index, "|", str(list(wrd.keys())[0]), "|", split_full_text[index])
+                if index == split_full_text_len:
+                    break
+                if split_full_text[index] == str(list(wrd.keys())[0]) and \
+                        ix <= slice_wrds - 3 and text_slices[-1] != _slice:
+                    index += 1
+                    pred_item_tuple = list(wrd.items())[0]
+                    output_text.append(pred_item_tuple)
+                elif split_full_text[index] == str(list(wrd.keys())[0]) and text_slices[-1] == _slice:
+                    index += 1
+                    pred_item_tuple = list(wrd.items())[0]
+                    output_text.append(pred_item_tuple)
+        assert [i[0] for i in output_text] == split_full_text
+        return output_text
+    @staticmethod
+    def punctuate_texts(full_pred: list):
+        """
+        Given a list of Predictions from the model, applies the predictions to text,
+        thus punctuating it.
+        """
+        punct_resp = ""
+        for i in full_pred:
+            word, label = i
+            if label[-1] == "U":
+                punct_wrd = word.capitalize()
+            else:
+                punct_wrd = word
+            if label[0] != "O":
+                punct_wrd += label[0]
+            punct_resp += punct_wrd + " "
+        punct_resp = punct_resp.strip()
+        # Append trailing period if doesnt exist.
+        if punct_resp[-1].isalnum():
+            punct_resp += "."
+        return punct_resp
+if __name__ == "__main__":
+    punct_model = RestorePuncts()
+    # read test file
+    with open('../tests/sample_text.txt', 'r') as fp:
+        test_sample = fp.read()
+    # predict text and print
+    punctuated = punct_model.punctuate(test_sample)
+    print(punctuated)

myrpunct/utils.py ADDED Viewed

	@@ -0,0 +1,34 @@

+# -*- coding: utf-8 -*-
+# 💾⚙️🔮
+__author__ = "Daulet N."
+__email__ = "daulet.nurmanbetov@gmail.com"
+def prepare_unpunct_text(text):
+    """
+    Given a text, normalizes it to subsequently restore punctuation
+    """
+    formatted_txt = text.replace('\n', '').strip()
+    formatted_txt = formatted_txt.lower()
+    formatted_txt_lst = formatted_txt.split(" ")
+    punct_strp_txt = [strip_punct(i) for i in formatted_txt_lst]
+    normalized_txt = " ".join([i for i in punct_strp_txt if i])
+    return normalized_txt
+def strip_punct(wrd):
+    """
+    Given a word, strips non aphanumeric characters that precede and follow it
+    """
+    if not wrd:
+        return wrd
+    while not wrd[-1:].isalnum():
+        if not wrd:
+            break
+        wrd = wrd[:-1]
+    while not wrd[:1].isalnum():
+        if not wrd:
+            break
+        wrd = wrd[1:]
+    return wrd

pytube/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+# flake8: noqa: F401
+# noreorder
+"""
+Pytube: a very serious Python library for downloading YouTube Videos.
+"""
+__title__ = "pytube"
+__author__ = "Ronnie Ghose, Taylor Fox Dahlin, Nick Ficano"
+__license__ = "The Unlicense (Unlicense)"
+__js__ = None
+__js_url__ = None
+from pytube.version import __version__
+from pytube.streams import Stream
+from pytube.captions import Caption
+from pytube.query import CaptionQuery, StreamQuery
+from pytube.__main__ import YouTube
+from pytube.contrib.playlist import Playlist
+from pytube.contrib.channel import Channel
+from pytube.contrib.search import Search

pytube/__main__.py ADDED Viewed

	@@ -0,0 +1,467 @@

+"""
+This module implements the core developer interface for pytube.
+The problem domain of the :class:`YouTube <YouTube> class focuses almost
+exclusively on the developer interface. Pytube offloads the heavy lifting to
+smaller peripheral modules and functions.
+"""
+import logging
+from typing import Any, Callable, Dict, List, Optional
+import pytube
+import pytube.exceptions as exceptions
+from pytube import extract, request
+from pytube import Stream, StreamQuery
+from pytube.helpers import install_proxy
+from pytube.innertube import InnerTube
+from pytube.metadata import YouTubeMetadata
+from pytube.monostate import Monostate
+logger = logging.getLogger(__name__)
+class YouTube:
+    """Core developer interface for pytube."""
+    def __init__(
+        self,
+        url: str,
+        on_progress_callback: Optional[Callable[[Any, bytes, int], None]] = None,
+        on_complete_callback: Optional[Callable[[Any, Optional[str]], None]] = None,
+        proxies: Dict[str, str] = None,
+        use_oauth: bool = False,
+        allow_oauth_cache: bool = True
+    ):
+        """Construct a :class:`YouTube <YouTube>`.
+        :param str url:
+            A valid YouTube watch URL.
+        :param func on_progress_callback:
+            (Optional) User defined callback function for stream download
+            progress events.
+        :param func on_complete_callback:
+            (Optional) User defined callback function for stream download
+            complete events.
+        :param dict proxies:
+            (Optional) A dict mapping protocol to proxy address which will be used by pytube.
+        :param bool use_oauth:
+            (Optional) Prompt the user to authenticate to YouTube.
+            If allow_oauth_cache is set to True, the user should only be prompted once.
+        :param bool allow_oauth_cache:
+            (Optional) Cache OAuth tokens locally on the machine. Defaults to True.
+            These tokens are only generated if use_oauth is set to True as well.
+        """
+        self._js: Optional[str] = None  # js fetched by js_url
+        self._js_url: Optional[str] = None  # the url to the js, parsed from watch html
+        self._vid_info: Optional[Dict] = None  # content fetched from innertube/player
+        self._watch_html: Optional[str] = None  # the html of /watch?v=<video_id>
+        self._embed_html: Optional[str] = None
+        self._player_config_args: Optional[Dict] = None  # inline js in the html containing
+        self._age_restricted: Optional[bool] = None
+        self._fmt_streams: Optional[List[Stream]] = None
+        self._initial_data = None
+        self._metadata: Optional[YouTubeMetadata] = None
+        # video_id part of /watch?v=<video_id>
+        self.video_id = extract.video_id(url)
+        self.watch_url = f"https://youtube.com/watch?v={self.video_id}"
+        self.embed_url = f"https://www.youtube.com/embed/{self.video_id}"
+        # Shared between all instances of `Stream` (Borg pattern).
+        self.stream_monostate = Monostate(
+            on_progress=on_progress_callback, on_complete=on_complete_callback
+        )
+        if proxies:
+            install_proxy(proxies)
+        self._author = None
+        self._title = None
+        self._publish_date = None
+        self.use_oauth = use_oauth
+        self.allow_oauth_cache = allow_oauth_cache
+    def __repr__(self):
+        return f'<pytube.__main__.YouTube object: videoId={self.video_id}>'
+    def __eq__(self, o: object) -> bool:
+        # Compare types and urls, if they're same return true, else return false.
+        return type(o) == type(self) and o.watch_url == self.watch_url
+    @property
+    def watch_html(self):
+        if self._watch_html:
+            return self._watch_html
+        self._watch_html = request.get(url=self.watch_url)
+        return self._watch_html
+    @property
+    def embed_html(self):
+        if self._embed_html:
+            return self._embed_html
+        self._embed_html = request.get(url=self.embed_url)
+        return self._embed_html
+    @property
+    def age_restricted(self):
+        if self._age_restricted:
+            return self._age_restricted
+        self._age_restricted = extract.is_age_restricted(self.watch_html)
+        return self._age_restricted
+    @property
+    def js_url(self):
+        if self._js_url:
+            return self._js_url
+        if self.age_restricted:
+            self._js_url = extract.js_url(self.embed_html)
+        else:
+            self._js_url = extract.js_url(self.watch_html)
+        return self._js_url
+    @property
+    def js(self):
+        if self._js:
+            return self._js
+        # If the js_url doesn't match the cached url, fetch the new js and update
+        #  the cache; otherwise, load the cache.
+        if pytube.__js_url__ != self.js_url:
+            self._js = request.get(self.js_url)
+            pytube.__js__ = self._js
+            pytube.__js_url__ = self.js_url
+        else:
+            self._js = pytube.__js__
+        return self._js
+    @property
+    def initial_data(self):
+        if self._initial_data:
+            return self._initial_data
+        self._initial_data = extract.initial_data(self.watch_html)
+        return self._initial_data
+    @property
+    def streaming_data(self):
+        """Return streamingData from video info."""
+        if 'streamingData' in self.vid_info:
+            return self.vid_info['streamingData']
+        else:
+            self.bypass_age_gate()
+            return self.vid_info['streamingData']
+    @property
+    def fmt_streams(self):
+        """Returns a list of streams if they have been initialized.
+        If the streams have not been initialized, finds all relevant
+        streams and initializes them.
+        """
+        self.check_availability()
+        if self._fmt_streams:
+            return self._fmt_streams
+        self._fmt_streams = []
+        stream_manifest = extract.apply_descrambler(self.streaming_data)
+        # If the cached js doesn't work, try fetching a new js file
+        # https://github.com/pytube/pytube/issues/1054
+        try:
+            extract.apply_signature(stream_manifest, self.vid_info, self.js)
+        except exceptions.ExtractError:
+            # To force an update to the js file, we clear the cache and retry
+            self._js = None
+            self._js_url = None
+            pytube.__js__ = None
+            pytube.__js_url__ = None
+            extract.apply_signature(stream_manifest, self.vid_info, self.js)
+        # build instances of :class:`Stream <Stream>`
+        # Initialize stream objects
+        for stream in stream_manifest:
+            video = Stream(
+                stream=stream,
+                monostate=self.stream_monostate,
+            )
+            self._fmt_streams.append(video)
+        self.stream_monostate.title = self.title
+        self.stream_monostate.duration = self.length
+        return self._fmt_streams
+    def check_availability(self):
+        """Check whether the video is available.
+        Raises different exceptions based on why the video is unavailable,
+        otherwise does nothing.
+        """
+        status, messages = extract.playability_status(self.watch_html)
+        for reason in messages:
+            if status == 'UNPLAYABLE':
+                if reason == (
+                    'Join this channel to get access to members-only content '
+                    'like this video, and other exclusive perks.'
+                ):
+                    raise exceptions.MembersOnly(video_id=self.video_id)
+                elif reason == 'This live stream recording is not available.':
+                    raise exceptions.RecordingUnavailable(video_id=self.video_id)
+                else:
+                    raise exceptions.VideoUnavailable(video_id=self.video_id)
+            elif status == 'LOGIN_REQUIRED':
+                if reason == (
+                    'This is a private video. '
+                    'Please sign in to verify that you may see it.'
+                ):
+                    raise exceptions.VideoPrivate(video_id=self.video_id)
+            elif status == 'ERROR':
+                if reason == 'Video unavailable':
+                    raise exceptions.VideoUnavailable(video_id=self.video_id)
+            elif status == 'LIVE_STREAM':
+                raise exceptions.LiveStreamError(video_id=self.video_id)
+    @property
+    def vid_info(self):
+        """Parse the raw vid info and return the parsed result.
+        :rtype: Dict[Any, Any]
+        """
+        if self._vid_info:
+            return self._vid_info
+        innertube = InnerTube(use_oauth=self.use_oauth, allow_cache=self.allow_oauth_cache)
+        innertube_response = innertube.player(self.video_id)
+        self._vid_info = innertube_response
+        return self._vid_info
+    def bypass_age_gate(self):
+        """Attempt to update the vid_info by bypassing the age gate."""
+        innertube = InnerTube(
+            client='ANDROID_EMBED',
+            use_oauth=self.use_oauth,
+            allow_cache=self.allow_oauth_cache
+        )
+        innertube_response = innertube.player(self.video_id)
+        playability_status = innertube_response['playabilityStatus'].get('status', None)
+        # If we still can't access the video, raise an exception
+        # (tier 3 age restriction)
+        if playability_status == 'UNPLAYABLE':
+            raise exceptions.AgeRestrictedError(self.video_id)
+        self._vid_info = innertube_response
+    @property
+    def caption_tracks(self) -> List[pytube.Caption]:
+        """Get a list of :class:`Caption <Caption>`.
+        :rtype: List[Caption]
+        """
+        raw_tracks = (
+            self.vid_info.get("captions", {})
+            .get("playerCaptionsTracklistRenderer", {})
+            .get("captionTracks", [])
+        )
+        return [pytube.Caption(track) for track in raw_tracks]
+    @property
+    def captions(self) -> pytube.CaptionQuery:
+        """Interface to query caption tracks.
+        :rtype: :class:`CaptionQuery <CaptionQuery>`.
+        """
+        return pytube.CaptionQuery(self.caption_tracks)
+    @property
+    def streams(self) -> StreamQuery:
+        """Interface to query both adaptive (DASH) and progressive streams.
+        :rtype: :class:`StreamQuery <StreamQuery>`.
+        """
+        self.check_availability()
+        return StreamQuery(self.fmt_streams)
+    @property
+    def thumbnail_url(self) -> str:
+        """Get the thumbnail url image.
+        :rtype: str
+        """
+        thumbnail_details = (
+            self.vid_info.get("videoDetails", {})
+            .get("thumbnail", {})
+            .get("thumbnails")
+        )
+        if thumbnail_details:
+            thumbnail_details = thumbnail_details[-1]  # last item has max size
+            return thumbnail_details["url"]
+        return f"https://img.youtube.com/vi/{self.video_id}/maxresdefault.jpg"
+    @property
+    def publish_date(self):
+        """Get the publish date.
+        :rtype: datetime
+        """
+        if self._publish_date:
+            return self._publish_date
+        self._publish_date = extract.publish_date(self.watch_html)
+        return self._publish_date
+    @publish_date.setter
+    def publish_date(self, value):
+        """Sets the publish date."""
+        self._publish_date = value
+    @property
+    def title(self) -> str:
+        """Get the video title.
+        :rtype: str
+        """
+        if self._title:
+            return self._title
+        try:
+            self._title = self.vid_info['videoDetails']['title']
+        except KeyError:
+            # Check_availability will raise the correct exception in most cases
+            #  if it doesn't, ask for a report.
+            self.check_availability()
+            raise exceptions.PytubeError(
+                (
+                    f'Exception while accessing title of {self.watch_url}. '
+                    'Please file a bug report at https://github.com/pytube/pytube'
+                )
+            )
+        return self._title
+    @title.setter
+    def title(self, value):
+        """Sets the title value."""
+        self._title = value
+    @property
+    def description(self) -> str:
+        """Get the video description.
+        :rtype: str
+        """
+        return self.vid_info.get("videoDetails", {}).get("shortDescription")
+    @property
+    def rating(self) -> float:
+        """Get the video average rating.
+        :rtype: float
+        """
+        return self.vid_info.get("videoDetails", {}).get("averageRating")
+    @property
+    def length(self) -> int:
+        """Get the video length in seconds.
+        :rtype: int
+        """
+        return int(self.vid_info.get('videoDetails', {}).get('lengthSeconds'))
+    @property
+    def views(self) -> int:
+        """Get the number of the times the video has been viewed.
+        :rtype: int
+        """
+        return int(self.vid_info.get("videoDetails", {}).get("viewCount"))
+    @property
+    def author(self) -> str:
+        """Get the video author.
+        :rtype: str
+        """
+        if self._author:
+            return self._author
+        self._author = self.vid_info.get("videoDetails", {}).get(
+            "author", "unknown"
+        )
+        return self._author
+    @author.setter
+    def author(self, value):
+        """Set the video author."""
+        self._author = value
+    @property
+    def keywords(self) -> List[str]:
+        """Get the video keywords.
+        :rtype: List[str]
+        """
+        return self.vid_info.get('videoDetails', {}).get('keywords', [])
+    @property
+    def channel_id(self) -> str:
+        """Get the video poster's channel id.
+        :rtype: str
+        """
+        return self.vid_info.get('videoDetails', {}).get('channelId', None)
+    @property
+    def channel_url(self) -> str:
+        """Construct the channel url for the video's poster from the channel id.
+        :rtype: str
+        """
+        return f'https://www.youtube.com/channel/{self.channel_id}'
+    @property
+    def metadata(self) -> Optional[YouTubeMetadata]:
+        """Get the metadata for the video.
+        :rtype: YouTubeMetadata
+        """
+        if self._metadata:
+            return self._metadata
+        else:
+            self._metadata = extract.metadata(self.initial_data)
+            return self._metadata
+    def register_on_progress_callback(self, func: Callable[[Any, bytes, int], None]):
+        """Register a download progress callback function post initialization.
+        :param callable func:
+            A callback function that takes ``stream``, ``chunk``,
+             and ``bytes_remaining`` as parameters.
+        :rtype: None
+        """
+        self.stream_monostate.on_progress = func
+    def register_on_complete_callback(self, func: Callable[[Any, Optional[str]], None]):
+        """Register a download complete callback function post initialization.
+        :param callable func:
+            A callback function that takes ``stream`` and  ``file_path``.
+        :rtype: None
+        """
+        self.stream_monostate.on_complete = func

pytube/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (830 Bytes). View file

pytube/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (819 Bytes). View file

pytube/__pycache__/__main__.cpython-310.pyc ADDED Viewed

Binary file (12.6 kB). View file

pytube/__pycache__/__main__.cpython-39.pyc ADDED Viewed

Binary file (12.8 kB). View file

pytube/__pycache__/captions.cpython-310.pyc ADDED Viewed

Binary file (4.95 kB). View file

pytube/__pycache__/captions.cpython-39.pyc ADDED Viewed

Binary file (4.92 kB). View file

pytube/__pycache__/cipher.cpython-310.pyc ADDED Viewed

Binary file (18.9 kB). View file

pytube/__pycache__/cipher.cpython-39.pyc ADDED Viewed

Binary file (18.9 kB). View file

pytube/__pycache__/exceptions.cpython-310.pyc ADDED Viewed

Binary file (5.01 kB). View file

pytube/__pycache__/exceptions.cpython-39.pyc ADDED Viewed

Binary file (5.55 kB). View file

pytube/__pycache__/extract.cpython-310.pyc ADDED Viewed

Binary file (15.4 kB). View file

pytube/__pycache__/extract.cpython-39.pyc ADDED Viewed

Binary file (15.5 kB). View file

pytube/__pycache__/helpers.cpython-310.pyc ADDED Viewed

Binary file (9.83 kB). View file

pytube/__pycache__/helpers.cpython-39.pyc ADDED Viewed

Binary file (9.84 kB). View file

pytube/__pycache__/innertube.cpython-310.pyc ADDED Viewed

Binary file (8.63 kB). View file

pytube/__pycache__/innertube.cpython-39.pyc ADDED Viewed

Binary file (8.63 kB). View file

pytube/__pycache__/itags.cpython-310.pyc ADDED Viewed

Binary file (2.78 kB). View file

pytube/__pycache__/itags.cpython-39.pyc ADDED Viewed

Binary file (2.26 kB). View file

pytube/__pycache__/metadata.cpython-310.pyc ADDED Viewed

Binary file (1.71 kB). View file

pytube/__pycache__/metadata.cpython-39.pyc ADDED Viewed

Binary file (1.71 kB). View file

pytube/__pycache__/monostate.cpython-310.pyc ADDED Viewed

Binary file (728 Bytes). View file

pytube/__pycache__/monostate.cpython-39.pyc ADDED Viewed

Binary file (695 Bytes). View file

pytube/__pycache__/parser.cpython-310.pyc ADDED Viewed

Binary file (3.94 kB). View file

pytube/__pycache__/parser.cpython-39.pyc ADDED Viewed

Binary file (3.93 kB). View file

pytube/__pycache__/query.cpython-310.pyc ADDED Viewed

Binary file (14.1 kB). View file

pytube/__pycache__/query.cpython-39.pyc ADDED Viewed

Binary file (14.3 kB). View file

pytube/__pycache__/request.cpython-310.pyc ADDED Viewed

Binary file (5.74 kB). View file

pytube/__pycache__/request.cpython-39.pyc ADDED Viewed

Binary file (5.69 kB). View file

pytube/__pycache__/streams.cpython-310.pyc ADDED Viewed

Binary file (10.9 kB). View file

pytube/__pycache__/streams.cpython-39.pyc ADDED Viewed

Binary file (10.8 kB). View file

pytube/__pycache__/version.cpython-310.pyc ADDED Viewed

Binary file (220 Bytes). View file

pytube/__pycache__/version.cpython-39.pyc ADDED Viewed

Binary file (214 Bytes). View file

pytube/captions.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import math
+import os
+import time
+import xml.etree.ElementTree as ElementTree
+from html import unescape
+from typing import Dict, Optional
+from pytube import request
+from pytube.helpers import safe_filename, target_directory
+class Caption:
+    """Container for caption tracks."""
+    def __init__(self, caption_track: Dict):
+        """Construct a :class:`Caption <Caption>`.
+        :param dict caption_track:
+            Caption track data extracted from ``watch_html``.
+        """
+        self.url = caption_track.get("baseUrl")
+        # Certain videos have runs instead of simpleText
+        #  this handles that edge case
+        name_dict = caption_track['name']
+        if 'simpleText' in name_dict:
+            self.name = name_dict['simpleText']
+        else:
+            for el in name_dict['runs']:
+                if 'text' in el:
+                    self.name = el['text']
+        # Use "vssId" instead of "languageCode", fix issue #779
+        self.code = caption_track["vssId"]
+        # Remove preceding '.' for backwards compatibility, e.g.:
+        # English -> vssId: .en, languageCode: en
+        # English (auto-generated) -> vssId: a.en, languageCode: en
+        self.code = self.code.strip('.')
+    @property
+    def xml_captions(self) -> str:
+        """Download the xml caption tracks."""
+        return request.get(self.url)
+    def generate_srt_captions(self) -> str:
+        """Generate "SubRip Subtitle" captions.
+        Takes the xml captions from :meth:`~pytube.Caption.xml_captions` and
+        recompiles them into the "SubRip Subtitle" format.
+        """
+        return self.xml_caption_to_srt(self.xml_captions)
+    @staticmethod
+    def float_to_srt_time_format(d: float) -> str:
+        """Convert decimal durations into proper srt format.
+        :rtype: str
+        :returns:
+            SubRip Subtitle (str) formatted time duration.
+        float_to_srt_time_format(3.89) -> '00:00:03,890'
+        """
+        fraction, whole = math.modf(d)
+        time_fmt = time.strftime("%H:%M:%S,", time.gmtime(whole))
+        ms = f"{fraction:.3f}".replace("0.", "")
+        return time_fmt + ms
+    def xml_caption_to_srt(self, xml_captions: str) -> str:
+        """Convert xml caption tracks to "SubRip Subtitle (srt)".
+        :param str xml_captions:
+            XML formatted caption tracks.
+        """
+        segments = []
+        root = ElementTree.fromstring(xml_captions)
+        for i, child in enumerate(list(root)):
+            text = child.text or ""
+            caption = unescape(text.replace("\n", " ").replace("  ", " "),)
+            try:
+                duration = float(child.attrib["dur"])
+            except KeyError:
+                duration = 0.0
+            start = float(child.attrib["start"])
+            end = start + duration
+            sequence_number = i + 1  # convert from 0-indexed to 1.
+            line = "{seq}\n{start} --> {end}\n{text}\n".format(
+                seq=sequence_number,
+                start=self.float_to_srt_time_format(start),
+                end=self.float_to_srt_time_format(end),
+                text=caption,
+            )
+            segments.append(line)
+        return "\n".join(segments).strip()
+    def download(
+        self,
+        title: str,
+        srt: bool = True,
+        output_path: Optional[str] = None,
+        filename_prefix: Optional[str] = None,
+    ) -> str:
+        """Write the media stream to disk.
+        :param title:
+            Output filename (stem only) for writing media file.
+            If one is not specified, the default filename is used.
+        :type title: str
+        :param srt:
+            Set to True to download srt, false to download xml. Defaults to True.
+        :type srt bool
+        :param output_path:
+            (optional) Output path for writing media file. If one is not
+            specified, defaults to the current working directory.
+        :type output_path: str or None
+        :param filename_prefix:
+            (optional) A string that will be prepended to the filename.
+            For example a number in a playlist or the name of a series.
+            If one is not specified, nothing will be prepended
+            This is separate from filename so you can use the default
+            filename but still add a prefix.
+        :type filename_prefix: str or None
+        :rtype: str
+        """
+        if title.endswith(".srt") or title.endswith(".xml"):
+            filename = ".".join(title.split(".")[:-1])
+        else:
+            filename = title
+        if filename_prefix:
+            filename = f"{safe_filename(filename_prefix)}{filename}"
+        filename = safe_filename(filename)
+        filename += f" ({self.code})"
+        if srt:
+            filename += ".srt"
+        else:
+            filename += ".xml"
+        file_path = os.path.join(target_directory(output_path), filename)
+        with open(file_path, "w", encoding="utf-8") as file_handle:
+            if srt:
+                file_handle.write(self.generate_srt_captions())
+            else:
+                file_handle.write(self.xml_captions)
+        return file_path
+    def __repr__(self):
+        """Printable object representation."""
+        return '<Caption lang="{s.name}" code="{s.code}">'.format(s=self)

pytube/cipher.py ADDED Viewed

	@@ -0,0 +1,697 @@

+"""
+This module contains all logic necessary to decipher the signature.
+YouTube's strategy to restrict downloading videos is to send a ciphered version
+of the signature to the client, along with the decryption algorithm obfuscated
+in JavaScript. For the clients to play the videos, JavaScript must take the
+ciphered version, cycle it through a series of "transform functions," and then
+signs the media URL with the output.
+This module is responsible for (1) finding and extracting those "transform
+functions" (2) maps them to Python equivalents and (3) taking the ciphered
+signature and decoding it.
+"""
+import logging
+import re
+from itertools import chain
+from typing import Any, Callable, Dict, List, Optional, Tuple
+from pytube.exceptions import ExtractError, RegexMatchError
+from pytube.helpers import cache, regex_search
+from pytube.parser import find_object_from_startpoint, throttling_array_split
+logger = logging.getLogger(__name__)
+class Cipher:
+    def __init__(self, js: str):
+        self.transform_plan: List[str] = get_transform_plan(js)
+        var_regex = re.compile(r"^\w+\W")
+        var_match = var_regex.search(self.transform_plan[0])
+        if not var_match:
+            raise RegexMatchError(
+                caller="__init__", pattern=var_regex.pattern
+            )
+        var = var_match.group(0)[:-1]
+        self.transform_map = get_transform_map(js, var)
+        self.js_func_patterns = [
+            r"\w+\.(\w+)\(\w,(\d+)\)",
+            r"\w+\[(\"\w+\")\]\(\w,(\d+)\)"
+        ]
+        self.throttling_plan = get_throttling_plan(js)
+        self.throttling_array = get_throttling_function_array(js)
+        self.calculated_n = None
+    def calculate_n(self, initial_n: list):
+        """Converts n to the correct value to prevent throttling."""
+        if self.calculated_n:
+            return self.calculated_n
+        # First, update all instances of 'b' with the list(initial_n)
+        for i in range(len(self.throttling_array)):
+            if self.throttling_array[i] == 'b':
+                self.throttling_array[i] = initial_n
+        for step in self.throttling_plan:
+            curr_func = self.throttling_array[int(step[0])]
+            if not callable(curr_func):
+                logger.debug(f'{curr_func} is not callable.')
+                logger.debug(f'Throttling array:\n{self.throttling_array}\n')
+                raise ExtractError(f'{curr_func} is not callable.')
+            first_arg = self.throttling_array[int(step[1])]
+            if len(step) == 2:
+                curr_func(first_arg)
+            elif len(step) == 3:
+                second_arg = self.throttling_array[int(step[2])]
+                curr_func(first_arg, second_arg)
+        self.calculated_n = ''.join(initial_n)
+        return self.calculated_n
+    def get_signature(self, ciphered_signature: str) -> str:
+        """Decipher the signature.
+        Taking the ciphered signature, applies the transform functions.
+        :param str ciphered_signature:
+            The ciphered signature sent in the ``player_config``.
+        :rtype: str
+        :returns:
+            Decrypted signature required to download the media content.
+        """
+        signature = list(ciphered_signature)
+        for js_func in self.transform_plan:
+            name, argument = self.parse_function(js_func)  # type: ignore
+            signature = self.transform_map[name](signature, argument)
+            logger.debug(
+                "applied transform function\n"
+                "output: %s\n"
+                "js_function: %s\n"
+                "argument: %d\n"
+                "function: %s",
+                "".join(signature),
+                name,
+                argument,
+                self.transform_map[name],
+            )
+        return "".join(signature)
+    @cache
+    def parse_function(self, js_func: str) -> Tuple[str, int]:
+        """Parse the Javascript transform function.
+        Break a JavaScript transform function down into a two element ``tuple``
+        containing the function name and some integer-based argument.
+        :param str js_func:
+            The JavaScript version of the transform function.
+        :rtype: tuple
+        :returns:
+            two element tuple containing the function name and an argument.
+        **Example**:
+        parse_function('DE.AJ(a,15)')
+        ('AJ', 15)
+        """
+        logger.debug("parsing transform function")
+        for pattern in self.js_func_patterns:
+            regex = re.compile(pattern)
+            parse_match = regex.search(js_func)
+            if parse_match:
+                fn_name, fn_arg = parse_match.groups()
+                return fn_name, int(fn_arg)
+        raise RegexMatchError(
+            caller="parse_function", pattern="js_func_patterns"
+        )
+def get_initial_function_name(js: str) -> str:
+    """Extract the name of the function responsible for computing the signature.
+    :param str js:
+        The contents of the base.js asset file.
+    :rtype: str
+    :returns:
+        Function name from regex match
+    """
+    function_patterns = [
+        r"\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(",  # noqa: E501
+        r"\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*encodeURIComponent\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(",  # noqa: E501
+        r'(?:\b|[^a-zA-Z0-9$])(?P<sig>[a-zA-Z0-9$]{2})\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',  # noqa: E501
+        r'(?P<sig>[a-zA-Z0-9$]+)\s*=\s*function\(\s*a\s*\)\s*{\s*a\s*=\s*a\.split\(\s*""\s*\)',  # noqa: E501
+        r'(["\'])signature\1\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(',
+        r"\.sig\|\|(?P<sig>[a-zA-Z0-9$]+)\(",
+        r"yt\.akamaized\.net/\)\s*\|\|\s*.*?\s*[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?:encodeURIComponent\s*\()?\s*(?P<sig>[a-zA-Z0-9$]+)\(",  # noqa: E501
+        r"\b[cs]\s*&&\s*[adf]\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(",  # noqa: E501
+        r"\b[a-zA-Z0-9]+\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*(?P<sig>[a-zA-Z0-9$]+)\(",  # noqa: E501
+        r"\bc\s*&&\s*a\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(",  # noqa: E501
+        r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(",  # noqa: E501
+        r"\bc\s*&&\s*[a-zA-Z0-9]+\.set\([^,]+\s*,\s*\([^)]*\)\s*\(\s*(?P<sig>[a-zA-Z0-9$]+)\(",  # noqa: E501
+    ]
+    logger.debug("finding initial function name")
+    for pattern in function_patterns:
+        regex = re.compile(pattern)
+        function_match = regex.search(js)
+        if function_match:
+            logger.debug("finished regex search, matched: %s", pattern)
+            return function_match.group(1)
+    raise RegexMatchError(
+        caller="get_initial_function_name", pattern="multiple"
+    )
+def get_transform_plan(js: str) -> List[str]:
+    """Extract the "transform plan".
+    The "transform plan" is the functions that the ciphered signature is
+    cycled through to obtain the actual signature.
+    :param str js:
+        The contents of the base.js asset file.
+    **Example**:
+    ['DE.AJ(a,15)',
+    'DE.VR(a,3)',
+    'DE.AJ(a,51)',
+    'DE.VR(a,3)',
+    'DE.kT(a,51)',
+    'DE.kT(a,8)',
+    'DE.VR(a,3)',
+    'DE.kT(a,21)']
+    """
+    name = re.escape(get_initial_function_name(js))
+    pattern = r"%s=function\(\w\){[a-z=\.\(\"\)]*;(.*);(?:.+)}" % name
+    logger.debug("getting transform plan")
+    return regex_search(pattern, js, group=1).split(";")
+def get_transform_object(js: str, var: str) -> List[str]:
+    """Extract the "transform object".
+    The "transform object" contains the function definitions referenced in the
+    "transform plan". The ``var`` argument is the obfuscated variable name
+    which contains these functions, for example, given the function call
+    ``DE.AJ(a,15)`` returned by the transform plan, "DE" would be the var.
+    :param str js:
+        The contents of the base.js asset file.
+    :param str var:
+        The obfuscated variable name that stores an object with all functions
+        that descrambles the signature.
+    **Example**:
+    >>> get_transform_object(js, 'DE')
+    ['AJ:function(a){a.reverse()}',
+    'VR:function(a,b){a.splice(0,b)}',
+    'kT:function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}']
+    """
+    pattern = r"var %s={(.*?)};" % re.escape(var)
+    logger.debug("getting transform object")
+    regex = re.compile(pattern, flags=re.DOTALL)
+    transform_match = regex.search(js)
+    if not transform_match:
+        raise RegexMatchError(caller="get_transform_object", pattern=pattern)
+    return transform_match.group(1).replace("\n", " ").split(", ")
+def get_transform_map(js: str, var: str) -> Dict:
+    """Build a transform function lookup.
+    Build a lookup table of obfuscated JavaScript function names to the
+    Python equivalents.
+    :param str js:
+        The contents of the base.js asset file.
+    :param str var:
+        The obfuscated variable name that stores an object with all functions
+        that descrambles the signature.
+    """
+    transform_object = get_transform_object(js, var)
+    mapper = {}
+    for obj in transform_object:
+        # AJ:function(a){a.reverse()} => AJ, function(a){a.reverse()}
+        name, function = obj.split(":", 1)
+        fn = map_functions(function)
+        mapper[name] = fn
+    return mapper
+def get_throttling_function_name(js: str) -> str:
+    """Extract the name of the function that computes the throttling parameter.
+    :param str js:
+        The contents of the base.js asset file.
+    :rtype: str
+    :returns:
+        The name of the function used to compute the throttling parameter.
+    """
+    function_patterns = [
+        # https://github.com/ytdl-org/youtube-dl/issues/29326#issuecomment-865985377
+        # https://github.com/yt-dlp/yt-dlp/commit/48416bc4a8f1d5ff07d5977659cb8ece7640dcd8
+        # var Bpa = [iha];
+        # ...
+        # a.C && (b = a.get("n")) && (b = Bpa[0](b), a.set("n", b),
+        # Bpa.length || iha("")) }};
+        # In the above case, `iha` is the relevant function name
+        r'a\.[a-zA-Z]\s*&&\s*\([a-z]\s*=\s*a\.get\("n"\)\)\s*&&\s*'
+        r'\([a-z]\s*=\s*([a-zA-Z0-9$]+)(\[\d+\])?\([a-z]\)',
+    ]
+    logger.debug('Finding throttling function name')
+    for pattern in function_patterns:
+        regex = re.compile(pattern)
+        function_match = regex.search(js)
+        if function_match:
+            logger.debug("finished regex search, matched: %s", pattern)
+            if len(function_match.groups()) == 1:
+                return function_match.group(1)
+            idx = function_match.group(2)
+            if idx:
+                idx = idx.strip("[]")
+                array = re.search(
+                    r'var {nfunc}\s*=\s*(\[.+?\]);'.format(
+                        nfunc=re.escape(function_match.group(1))),
+                    js
+                )
+                if array:
+                    array = array.group(1).strip("[]").split(",")
+                    array = [x.strip() for x in array]
+                    return array[int(idx)]
+    raise RegexMatchError(
+        caller="get_throttling_function_name", pattern="multiple"
+    )
+def get_throttling_function_code(js: str) -> str:
+    """Extract the raw code for the throttling function.
+    :param str js:
+        The contents of the base.js asset file.
+    :rtype: str
+    :returns:
+        The name of the function used to compute the throttling parameter.
+    """
+    # Begin by extracting the correct function name
+    name = re.escape(get_throttling_function_name(js))
+    # Identify where the function is defined
+    pattern_start = r"%s=function\(\w\)" % name
+    regex = re.compile(pattern_start)
+    match = regex.search(js)
+    # Extract the code within curly braces for the function itself, and merge any split lines
+    code_lines_list = find_object_from_startpoint(js, match.span()[1]).split('\n')
+    joined_lines = "".join(code_lines_list)
+    # Prepend function definition (e.g. `Dea=function(a)`)
+    return match.group(0) + joined_lines
+def get_throttling_function_array(js: str) -> List[Any]:
+    """Extract the "c" array.
+    :param str js:
+        The contents of the base.js asset file.
+    :returns:
+        The array of various integers, arrays, and functions.
+    """
+    raw_code = get_throttling_function_code(js)
+    array_start = r",c=\["
+    array_regex = re.compile(array_start)
+    match = array_regex.search(raw_code)
+    array_raw = find_object_from_startpoint(raw_code, match.span()[1] - 1)
+    str_array = throttling_array_split(array_raw)
+    converted_array = []
+    for el in str_array:
+        try:
+            converted_array.append(int(el))
+            continue
+        except ValueError:
+            # Not an integer value.
+            pass
+        if el == 'null':
+            converted_array.append(None)
+            continue
+        if el.startswith('"') and el.endswith('"'):
+            # Convert e.g. '"abcdef"' to string without quotation marks, 'abcdef'
+            converted_array.append(el[1:-1])
+            continue
+        if el.startswith('function'):
+            mapper = (
+                (r"{for\(\w=\(\w%\w\.length\+\w\.length\)%\w\.length;\w--;\)\w\.unshift\(\w.pop\(\)\)}", throttling_unshift),  # noqa:E501
+                (r"{\w\.reverse\(\)}", throttling_reverse),
+                (r"{\w\.push\(\w\)}", throttling_push),
+                (r";var\s\w=\w\[0\];\w\[0\]=\w\[\w\];\w\[\w\]=\w}", throttling_swap),
+                (r"case\s\d+", throttling_cipher_function),
+                (r"\w\.splice\(0,1,\w\.splice\(\w,1,\w\[0\]\)\[0\]\)", throttling_nested_splice),  # noqa:E501
+                (r";\w\.splice\(\w,1\)}", js_splice),
+                (r"\w\.splice\(-\w\)\.reverse\(\)\.forEach\(function\(\w\){\w\.unshift\(\w\)}\)", throttling_prepend),  # noqa:E501
+                (r"for\(var \w=\w\.length;\w;\)\w\.push\(\w\.splice\(--\w,1\)\[0\]\)}", throttling_reverse),  # noqa:E501
+            )
+            found = False
+            for pattern, fn in mapper:
+                if re.search(pattern, el):
+                    converted_array.append(fn)
+                    found = True
+            if found:
+                continue
+        converted_array.append(el)
+    # Replace null elements with array itself
+    for i in range(len(converted_array)):
+        if converted_array[i] is None:
+            converted_array[i] = converted_array
+    return converted_array
+def get_throttling_plan(js: str):
+    """Extract the "throttling plan".
+    The "throttling plan" is a list of tuples used for calling functions
+    in the c array. The first element of the tuple is the index of the
+    function to call, and any remaining elements of the tuple are arguments
+    to pass to that function.
+    :param str js:
+        The contents of the base.js asset file.
+    :returns:
+        The full function code for computing the throttlign parameter.
+    """
+    raw_code = get_throttling_function_code(js)
+    transform_start = r"try{"
+    plan_regex = re.compile(transform_start)
+    match = plan_regex.search(raw_code)
+    transform_plan_raw = find_object_from_startpoint(raw_code, match.span()[1] - 1)
+    # Steps are either c[x](c[y]) or c[x](c[y],c[z])
+    step_start = r"c\[(\d+)\]\(c\[(\d+)\](,c(\[(\d+)\]))?\)"
+    step_regex = re.compile(step_start)
+    matches = step_regex.findall(transform_plan_raw)
+    transform_steps = []
+    for match in matches:
+        if match[4] != '':
+            transform_steps.append((match[0],match[1],match[4]))
+        else:
+            transform_steps.append((match[0],match[1]))
+    return transform_steps
+def reverse(arr: List, _: Optional[Any]):
+    """Reverse elements in a list.
+    This function is equivalent to:
+    .. code-block:: javascript
+        function(a, b) { a.reverse() }
+    This method takes an unused ``b`` variable as their transform functions
+    universally sent two arguments.
+    **Example**:
+    >>> reverse([1, 2, 3, 4])
+    [4, 3, 2, 1]
+    """
+    return arr[::-1]
+def splice(arr: List, b: int):
+    """Add/remove items to/from a list.
+    This function is equivalent to:
+    .. code-block:: javascript
+        function(a, b) { a.splice(0, b) }
+    **Example**:
+    >>> splice([1, 2, 3, 4], 2)
+    [1, 2]
+    """
+    return arr[b:]
+def swap(arr: List, b: int):
+    """Swap positions at b modulus the list length.
+    This function is equivalent to:
+    .. code-block:: javascript
+        function(a, b) { var c=a[0];a[0]=a[b%a.length];a[b]=c }
+    **Example**:
+    >>> swap([1, 2, 3, 4], 2)
+    [3, 2, 1, 4]
+    """
+    r = b % len(arr)
+    return list(chain([arr[r]], arr[1:r], [arr[0]], arr[r + 1 :]))
+def throttling_reverse(arr: list):
+    """Reverses the input list.
+    Needs to do an in-place reversal so that the passed list gets changed.
+    To accomplish this, we create a reversed copy, and then change each
+    indvidual element.
+    """
+    reverse_copy = arr.copy()[::-1]
+    for i in range(len(reverse_copy)):
+        arr[i] = reverse_copy[i]
+def throttling_push(d: list, e: Any):
+    """Pushes an element onto a list."""
+    d.append(e)
+def throttling_mod_func(d: list, e: int):
+    """Perform the modular function from the throttling array functions.
+    In the javascript, the modular operation is as follows:
+    e = (e % d.length + d.length) % d.length
+    We simply translate this to python here.
+    """
+    return (e % len(d) + len(d)) % len(d)
+def throttling_unshift(d: list, e: int):
+    """Rotates the elements of the list to the right.
+    In the javascript, the operation is as follows:
+    for(e=(e%d.length+d.length)%d.length;e--;)d.unshift(d.pop())
+    """
+    e = throttling_mod_func(d, e)
+    new_arr = d[-e:] + d[:-e]
+    d.clear()
+    for el in new_arr:
+        d.append(el)
+def throttling_cipher_function(d: list, e: str):
+    """This ciphers d with e to generate a new list.
+    In the javascript, the operation is as follows:
+    var h = [A-Za-z0-9-_], f = 96;  // simplified from switch-case loop
+    d.forEach(
+        function(l,m,n){
+            this.push(
+                n[m]=h[
+                    (h.indexOf(l)-h.indexOf(this[m])+m-32+f--)%h.length
+                ]
+            )
+        },
+        e.split("")
+    )
+    """
+    h = list('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_')
+    f = 96
+    # by naming it "this" we can more closely reflect the js
+    this = list(e)
+    # This is so we don't run into weirdness with enumerate while
+    #  we change the input list
+    copied_list = d.copy()
+    for m, l in enumerate(copied_list):
+        bracket_val = (h.index(l) - h.index(this[m]) + m - 32 + f) % len(h)
+        this.append(
+            h[bracket_val]
+        )
+        d[m] = h[bracket_val]
+        f -= 1
+def throttling_nested_splice(d: list, e: int):
+    """Nested splice function in throttling js.
+    In the javascript, the operation is as follows:
+    function(d,e){
+        e=(e%d.length+d.length)%d.length;
+        d.splice(
+            0,
+            1,
+            d.splice(
+                e,
+                1,
+                d[0]
+            )[0]
+        )
+    }
+    While testing, all this seemed to do is swap element 0 and e,
+    but the actual process is preserved in case there was an edge
+    case that was not considered.
+    """
+    e = throttling_mod_func(d, e)
+    inner_splice = js_splice(
+        d,
+        e,
+        1,
+        d[0]
+    )
+    js_splice(
+        d,
+        0,
+        1,
+        inner_splice[0]
+    )
+def throttling_prepend(d: list, e: int):
+    """
+    In the javascript, the operation is as follows:
+    function(d,e){
+        e=(e%d.length+d.length)%d.length;
+        d.splice(-e).reverse().forEach(
+            function(f){
+                d.unshift(f)
+            }
+        )
+    }
+    Effectively, this moves the last e elements of d to the beginning.
+    """
+    start_len = len(d)
+    # First, calculate e
+    e = throttling_mod_func(d, e)
+    # Then do the prepending
+    new_arr = d[-e:] + d[:-e]
+    # And update the input list
+    d.clear()
+    for el in new_arr:
+        d.append(el)
+    end_len = len(d)
+    assert start_len == end_len
+def throttling_swap(d: list, e: int):
+    """Swap positions of the 0'th and e'th elements in-place."""
+    e = throttling_mod_func(d, e)
+    f = d[0]
+    d[0] = d[e]
+    d[e] = f
+def js_splice(arr: list, start: int, delete_count=None, *items):
+    """Implementation of javascript's splice function.
+    :param list arr:
+        Array to splice
+    :param int start:
+        Index at which to start changing the array
+    :param int delete_count:
+        Number of elements to delete from the array
+    :param *items:
+        Items to add to the array
+    Reference: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/splice  # noqa:E501
+    """
+    # Special conditions for start value
+    try:
+        if start > len(arr):
+            start = len(arr)
+        # If start is negative, count backwards from end
+        if start < 0:
+            start = len(arr) - start
+    except TypeError:
+        # Non-integer start values are treated as 0 in js
+        start = 0
+    # Special condition when delete_count is greater than remaining elements
+    if not delete_count or delete_count >= len(arr) - start:
+        delete_count = len(arr) - start  # noqa: N806
+    deleted_elements = arr[start:start + delete_count]
+    # Splice appropriately.
+    new_arr = arr[:start] + list(items) + arr[start + delete_count:]
+    # Replace contents of input array
+    arr.clear()
+    for el in new_arr:
+        arr.append(el)
+    return deleted_elements
+def map_functions(js_func: str) -> Callable:
+    """For a given JavaScript transform function, return the Python equivalent.
+    :param str js_func:
+        The JavaScript version of the transform function.
+    """
+    mapper = (
+        # function(a){a.reverse()}
+        (r"{\w\.reverse\(\)}", reverse),
+        # function(a,b){a.splice(0,b)}
+        (r"{\w\.splice\(0,\w\)}", splice),
+        # function(a,b){var c=a[0];a[0]=a[b%a.length];a[b]=c}
+        (r"{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\]=\w}", swap),
+        # function(a,b){var c=a[0];a[0]=a[b%a.length];a[b%a.length]=c}
+        (
+            r"{var\s\w=\w\[0\];\w\[0\]=\w\[\w\%\w.length\];\w\[\w\%\w.length\]=\w}",
+            swap,
+        ),
+    )
+    for pattern, fn in mapper:
+        if re.search(pattern, js_func):
+            return fn
+    raise RegexMatchError(caller="map_functions", pattern="multiple")

pytube/cli.py ADDED Viewed

	@@ -0,0 +1,560 @@

+#!/usr/bin/env python3
+"""A simple command line application to download youtube videos."""
+import argparse
+import gzip
+import json
+import logging
+import os
+import shutil
+import sys
+import datetime as dt
+import subprocess  # nosec
+from typing import List, Optional
+import pytube.exceptions as exceptions
+from pytube import __version__
+from pytube import CaptionQuery, Playlist, Stream, YouTube
+from pytube.helpers import safe_filename, setup_logger
+logger = logging.getLogger(__name__)
+def main():
+    """Command line application to download youtube videos."""
+    # noinspection PyTypeChecker
+    parser = argparse.ArgumentParser(description=main.__doc__)
+    args = _parse_args(parser)
+    if args.verbose:
+        log_filename = None
+        if args.logfile:
+            log_filename = args.logfile
+        setup_logger(logging.DEBUG, log_filename=log_filename)
+        logger.debug(f'Pytube version: {__version__}')
+    if not args.url or "youtu" not in args.url:
+        parser.print_help()
+        sys.exit(1)
+    if "/playlist" in args.url:
+        print("Loading playlist...")
+        playlist = Playlist(args.url)
+        if not args.target:
+            args.target = safe_filename(playlist.title)
+        for youtube_video in playlist.videos:
+            try:
+                _perform_args_on_youtube(youtube_video, args)
+            except exceptions.PytubeError as e:
+                print(f"There was an error with video: {youtube_video}")
+                print(e)
+    else:
+        print("Loading video...")
+        youtube = YouTube(args.url)
+        _perform_args_on_youtube(youtube, args)
+def _perform_args_on_youtube(
+    youtube: YouTube, args: argparse.Namespace
+) -> None:
+    if len(sys.argv) == 2 :  # no arguments parsed
+        download_highest_resolution_progressive(
+            youtube=youtube, resolution="highest", target=args.target
+        )
+    if args.list_captions:
+        _print_available_captions(youtube.captions)
+    if args.list:
+        display_streams(youtube)
+    if args.build_playback_report:
+        build_playback_report(youtube)
+    if args.itag:
+        download_by_itag(youtube=youtube, itag=args.itag, target=args.target)
+    if args.caption_code:
+        download_caption(
+            youtube=youtube, lang_code=args.caption_code, target=args.target
+        )
+    if args.resolution:
+        download_by_resolution(
+            youtube=youtube, resolution=args.resolution, target=args.target
+        )
+    if args.audio:
+        download_audio(
+            youtube=youtube, filetype=args.audio, target=args.target
+        )
+    if args.ffmpeg:
+        ffmpeg_process(
+            youtube=youtube, resolution=args.ffmpeg, target=args.target
+        )
+def _parse_args(
+    parser: argparse.ArgumentParser, args: Optional[List] = None
+) -> argparse.Namespace:
+    parser.add_argument(
+        "url", help="The YouTube /watch or /playlist url", nargs="?"
+    )
+    parser.add_argument(
+        "--version", action="version", version="%(prog)s " + __version__,
+    )
+    parser.add_argument(
+        "--itag", type=int, help="The itag for the desired stream",
+    )
+    parser.add_argument(
+        "-r",
+        "--resolution",
+        type=str,
+        help="The resolution for the desired stream",
+    )
+    parser.add_argument(
+        "-l",
+        "--list",
+        action="store_true",
+        help=(
+            "The list option causes pytube cli to return a list of streams "
+            "available to download"
+        ),
+    )
+    parser.add_argument(
+        "-v",
+        "--verbose",
+        action="store_true",
+        dest="verbose",
+        help="Set logger output to verbose output.",
+    )
+    parser.add_argument(
+        "--logfile",
+        action="store",
+        help="logging debug and error messages into a log file",
+    )
+    parser.add_argument(
+        "--build-playback-report",
+        action="store_true",
+        help="Save the html and js to disk",
+    )
+    parser.add_argument(
+        "-c",
+        "--caption-code",
+        type=str,
+        help=(
+            "Download srt captions for given language code. "
+            "Prints available language codes if no argument given"
+        ),
+    )
+    parser.add_argument(
+        '-lc',
+        '--list-captions',
+        action='store_true',
+        help=(
+            "List available caption codes for a video"
+        )
+    )
+    parser.add_argument(
+        "-t",
+        "--target",
+        help=(
+            "The output directory for the downloaded stream. "
+            "Default is current working directory"
+        ),
+    )
+    parser.add_argument(
+        "-a",
+        "--audio",
+        const="mp4",
+        nargs="?",
+        help=(
+            "Download the audio for a given URL at the highest bitrate available"
+            "Defaults to mp4 format if none is specified"
+        ),
+    )
+    parser.add_argument(
+        "-f",
+        "--ffmpeg",
+        const="best",
+        nargs="?",
+        help=(
+            "Downloads the audio and video stream for resolution provided"
+            "If no resolution is provided, downloads the best resolution"
+            "Runs the command line program ffmpeg to combine the audio and video"
+        ),
+    )
+    return parser.parse_args(args)
+def build_playback_report(youtube: YouTube) -> None:
+    """Serialize the request data to json for offline debugging.
+    :param YouTube youtube:
+        A YouTube object.
+    """
+    ts = int(dt.datetime.utcnow().timestamp())
+    fp = os.path.join(os.getcwd(), f"yt-video-{youtube.video_id}-{ts}.json.gz")
+    js = youtube.js
+    watch_html = youtube.watch_html
+    vid_info = youtube.vid_info
+    with gzip.open(fp, "wb") as fh:
+        fh.write(
+            json.dumps(
+                {
+                    "url": youtube.watch_url,
+                    "js": js,
+                    "watch_html": watch_html,
+                    "video_info": vid_info,
+                }
+            ).encode("utf8"),
+        )
+def display_progress_bar(
+    bytes_received: int, filesize: int, ch: str = "█", scale: float = 0.55
+) -> None:
+    """Display a simple, pretty progress bar.
+    Example:
+    ~~~~~~~~
+    PSY - GANGNAM STYLE(강남스타일) MV.mp4
+    ↳ |███████████████████████████████████████| 100.0%
+    :param int bytes_received:
+        The delta between the total file size (bytes) and bytes already
+        written to disk.
+    :param int filesize:
+        File size of the media stream in bytes.
+    :param str ch:
+        Character to use for presenting progress segment.
+    :param float scale:
+        Scale multiplier to reduce progress bar size.
+    """
+    columns = shutil.get_terminal_size().columns
+    max_width = int(columns * scale)
+    filled = int(round(max_width * bytes_received / float(filesize)))
+    remaining = max_width - filled
+    progress_bar = ch * filled + " " * remaining
+    percent = round(100.0 * bytes_received / float(filesize), 1)
+    text = f" ↳ |{progress_bar}| {percent}%\r"
+    sys.stdout.write(text)
+    sys.stdout.flush()
+# noinspection PyUnusedLocal
+def on_progress(
+    stream: Stream, chunk: bytes, bytes_remaining: int
+) -> None:  # pylint: disable=W0613
+    filesize = stream.filesize
+    bytes_received = filesize - bytes_remaining
+    display_progress_bar(bytes_received, filesize)
+def _download(
+    stream: Stream,
+    target: Optional[str] = None,
+    filename: Optional[str] = None,
+) -> None:
+    filesize_megabytes = stream.filesize // 1048576
+    print(f"{filename or stream.default_filename} | {filesize_megabytes} MB")
+    file_path = stream.get_file_path(filename=filename, output_path=target)
+    if stream.exists_at_path(file_path):
+        print(f"Already downloaded at:\n{file_path}")
+        return
+    stream.download(output_path=target, filename=filename)
+    sys.stdout.write("\n")
+def _unique_name(base: str, subtype: str, media_type: str, target: str) -> str:
+    """
+    Given a base name, the file format, and the target directory, will generate
+    a filename unique for that directory and file format.
+    :param str base:
+        The given base-name.
+    :param str subtype:
+        The filetype of the video which will be downloaded.
+    :param str media_type:
+        The media_type of the file, ie. "audio" or "video"
+    :param Path target:
+        Target directory for download.
+    """
+    counter = 0
+    while True:
+        file_name = f"{base}_{media_type}_{counter}"
+        file_path = os.path.join(target, f"{file_name}.{subtype}")
+        if not os.path.exists(file_path):
+            return file_name
+        counter += 1
+def ffmpeg_process(
+    youtube: YouTube, resolution: str, target: Optional[str] = None
+) -> None:
+    """
+    Decides the correct video stream to download, then calls _ffmpeg_downloader.
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param str resolution:
+        YouTube video resolution.
+    :param str target:
+        Target directory for download
+    """
+    youtube.register_on_progress_callback(on_progress)
+    target = target or os.getcwd()
+    if resolution == "best":
+        highest_quality_stream = (
+            youtube.streams.filter(progressive=False)
+            .order_by("resolution")
+            .last()
+        )
+        mp4_stream = (
+            youtube.streams.filter(progressive=False, subtype="mp4")
+            .order_by("resolution")
+            .last()
+        )
+        if highest_quality_stream.resolution == mp4_stream.resolution:
+            video_stream = mp4_stream
+        else:
+            video_stream = highest_quality_stream
+    else:
+        video_stream = youtube.streams.filter(
+            progressive=False, resolution=resolution, subtype="mp4"
+        ).first()
+        if not video_stream:
+            video_stream = youtube.streams.filter(
+                progressive=False, resolution=resolution
+            ).first()
+    if video_stream is None:
+        print(f"Could not find a stream with resolution: {resolution}")
+        print("Try one of these:")
+        display_streams(youtube)
+        sys.exit()
+    audio_stream = youtube.streams.get_audio_only(video_stream.subtype)
+    if not audio_stream:
+        audio_stream = (
+            youtube.streams.filter(only_audio=True).order_by("abr").last()
+        )
+    if not audio_stream:
+        print("Could not find an audio only stream")
+        sys.exit()
+    _ffmpeg_downloader(
+        audio_stream=audio_stream, video_stream=video_stream, target=target
+    )
+def _ffmpeg_downloader(
+    audio_stream: Stream, video_stream: Stream, target: str
+) -> None:
+    """
+    Given a YouTube Stream object, finds the correct audio stream, downloads them both
+    giving them a unique name, them uses ffmpeg to create a new file with the audio
+    and video from the previously downloaded files. Then deletes the original adaptive
+    streams, leaving the combination.
+    :param Stream audio_stream:
+        A valid Stream object representing the audio to download
+    :param Stream video_stream:
+        A valid Stream object representing the video to download
+    :param Path target:
+        A valid Path object
+    """
+    video_unique_name = _unique_name(
+        safe_filename(video_stream.title),
+        video_stream.subtype,
+        "video",
+        target=target,
+    )
+    audio_unique_name = _unique_name(
+        safe_filename(video_stream.title),
+        audio_stream.subtype,
+        "audio",
+        target=target,
+    )
+    _download(stream=video_stream, target=target, filename=video_unique_name)
+    print("Loading audio...")
+    _download(stream=audio_stream, target=target, filename=audio_unique_name)
+    video_path = os.path.join(
+        target, f"{video_unique_name}.{video_stream.subtype}"
+    )
+    audio_path = os.path.join(
+        target, f"{audio_unique_name}.{audio_stream.subtype}"
+    )
+    final_path = os.path.join(
+        target, f"{safe_filename(video_stream.title)}.{video_stream.subtype}"
+    )
+    subprocess.run(  # nosec
+        [
+            "ffmpeg",
+            "-i",
+            video_path,
+            "-i",
+            audio_path,
+            "-codec",
+            "copy",
+            final_path,
+        ]
+    )
+    os.unlink(video_path)
+    os.unlink(audio_path)
+def download_by_itag(
+    youtube: YouTube, itag: int, target: Optional[str] = None
+) -> None:
+    """Start downloading a YouTube video.
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param int itag:
+        YouTube format identifier code.
+    :param str target:
+        Target directory for download
+    """
+    stream = youtube.streams.get_by_itag(itag)
+    if stream is None:
+        print(f"Could not find a stream with itag: {itag}")
+        print("Try one of these:")
+        display_streams(youtube)
+        sys.exit()
+    youtube.register_on_progress_callback(on_progress)
+    try:
+        _download(stream, target=target)
+    except KeyboardInterrupt:
+        sys.exit()
+def download_by_resolution(
+    youtube: YouTube, resolution: str, target: Optional[str] = None
+) -> None:
+    """Start downloading a YouTube video.
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param str resolution:
+        YouTube video resolution.
+    :param str target:
+        Target directory for download
+    """
+    # TODO(nficano): allow dash itags to be selected
+    stream = youtube.streams.get_by_resolution(resolution)
+    if stream is None:
+        print(f"Could not find a stream with resolution: {resolution}")
+        print("Try one of these:")
+        display_streams(youtube)
+        sys.exit()
+    youtube.register_on_progress_callback(on_progress)
+    try:
+        _download(stream, target=target)
+    except KeyboardInterrupt:
+        sys.exit()
+def download_highest_resolution_progressive(
+    youtube: YouTube, resolution: str, target: Optional[str] = None
+) -> None:
+    """Start downloading the highest resolution progressive stream.
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param str resolution:
+        YouTube video resolution.
+    :param str target:
+        Target directory for download
+    """
+    youtube.register_on_progress_callback(on_progress)
+    try:
+        stream = youtube.streams.get_highest_resolution()
+    except exceptions.VideoUnavailable as err:
+        print(f"No video streams available: {err}")
+    else:
+        try:
+            _download(stream, target=target)
+        except KeyboardInterrupt:
+            sys.exit()
+def display_streams(youtube: YouTube) -> None:
+    """Probe YouTube video and lists its available formats.
+    :param YouTube youtube:
+        A valid YouTube watch URL.
+    """
+    for stream in youtube.streams:
+        print(stream)
+def _print_available_captions(captions: CaptionQuery) -> None:
+    print(
+        f"Available caption codes are: {', '.join(c.code for c in captions)}"
+    )
+def download_caption(
+    youtube: YouTube, lang_code: Optional[str], target: Optional[str] = None
+) -> None:
+    """Download a caption for the YouTube video.
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param str lang_code:
+        Language code desired for caption file.
+        Prints available codes if the value is None
+        or the desired code is not available.
+    :param str target:
+        Target directory for download
+    """
+    try:
+        caption = youtube.captions[lang_code]
+        downloaded_path = caption.download(
+            title=youtube.title, output_path=target
+        )
+        print(f"Saved caption file to: {downloaded_path}")
+    except KeyError:
+        print(f"Unable to find caption with code: {lang_code}")
+        _print_available_captions(youtube.captions)
+def download_audio(
+    youtube: YouTube, filetype: str, target: Optional[str] = None
+) -> None:
+    """
+    Given a filetype, downloads the highest quality available audio stream for a
+    YouTube video.
+    :param YouTube youtube:
+        A valid YouTube object.
+    :param str filetype:
+        Desired file format to download.
+    :param str target:
+        Target directory for download
+    """
+    audio = (
+        youtube.streams.filter(only_audio=True, subtype=filetype)
+        .order_by("abr")
+        .last()
+    )
+    if audio is None:
+        print("No audio only stream found. Try one of these:")
+        display_streams(youtube)
+        sys.exit()
+    youtube.register_on_progress_callback(on_progress)
+    try:
+        _download(audio, target=target)
+    except KeyboardInterrupt:
+        sys.exit()
+if __name__ == "__main__":
+    main()

pytube/contrib/__init__.py ADDED Viewed

File without changes

pytube/contrib/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (150 Bytes). View file

pytube/contrib/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (150 Bytes). View file