Ferdowsi
/

pytube

Model card Files Files and versions Community

hbmartin commited on Feb 9, 2020

Commit

724e315

1 Parent(s): 61e7e36

add self.player_response and lazy caption init

Browse files

Files changed (7) hide show

pytube/__main__.py +18 -29
pytube/contrib/playlist.py +5 -2
pytube/monostate.py +1 -2
pytube/query.py +1 -0
pytube/streams.py +1 -1
pytube/version.py +1 -0
tests/test_streams.py +4 -10

pytube/__main__.py CHANGED Viewed

@@ -67,12 +67,12 @@ class YouTube:
         self.watch_html: Optional[str] = None  # the html of /watch?v=<video_id>
         self.embed_html: Optional[str] = None
         self.player_config_args: Dict = {}  # inline js in the html containing
         # streams
         self.age_restricted: Optional[bool] = None
         self.vid_descr: Optional[str] = None
         self.fmt_streams: List[Stream] = []
-        self.caption_tracks: List[Caption] = []
         # video_id part of /watch?v=<video_id>
         self.video_id = extract.video_id(url)
@@ -153,11 +153,9 @@ class YouTube:
             self.initialize_stream_objects(fmt)
         # load the player_response object (contains subtitle information)
-        self.player_config_args["player_response"] = json.loads(
-            self.player_config_args["player_response"]
-        )
-        self.initialize_caption_objects()
         logger.info("init finished successfully")
     def prefetch(self) -> None:
@@ -173,8 +171,7 @@ class YouTube:
         self.watch_html = request.get(url=self.watch_url)
         if (
             self.watch_html is None
-            or '<img class="icon meh" src="/yts/img'  # noqa: W503
-            not in self.watch_html  # noqa: W503
         ):
             raise VideoUnavailable(video_id=self.video_id)
@@ -214,26 +211,19 @@ class YouTube:
             )
             self.fmt_streams.append(video)
-    def initialize_caption_objects(self) -> None:
-        """Populate instances of :class:`Caption <Caption>`.
-        Take the unscrambled player response data, and use it to initialize
-        instances of :class:`Caption <Caption>`.
-        :rtype: None
         """
-        if "captions" not in self.player_config_args["player_response"]:
-            return
-        # https://github.com/nficano/pytube/issues/167
-        caption_tracks = (
-            self.player_config_args.get("player_response", {})
             .get("captions", {})
             .get("playerCaptionsTracklistRenderer", {})
             .get("captionTracks", [])
         )
-        for caption_track in caption_tracks:
-            self.caption_tracks.append(Caption(caption_track))
     @property
     def captions(self) -> CaptionQuery:
@@ -258,9 +248,8 @@ class YouTube:
         :rtype: str
         """
-        player_response = self.player_config_args.get("player_response", {})
         thumbnail_details = (
-            player_response.get("videoDetails", {})
             .get("thumbnail", {})
             .get("thumbnails")
         )
@@ -278,7 +267,7 @@ class YouTube:
         """
         return self.player_config_args.get("title") or (
-            self.player_config_args.get("player_response", {})
             .get("videoDetails", {})
             .get("title")
         )
@@ -291,7 +280,7 @@ class YouTube:
         """
         return self.vid_descr or (
-            self.player_config_args.get("player_response", {})
             .get("videoDetails", {})
             .get("shortDescription")
         )
@@ -304,7 +293,7 @@ class YouTube:
         """
         return (
-            self.player_config_args.get("player_response", {})
             .get("videoDetails", {})
             .get("averageRating")
         )
@@ -319,7 +308,7 @@ class YouTube:
         return int(
             self.player_config_args.get("length_seconds")
             or (
-                self.player_config_args.get("player_response", {})
                 .get("videoDetails", {})
                 .get("lengthSeconds")
             )
@@ -333,7 +322,7 @@ class YouTube:
         """
         return int(
-            self.player_config_args.get("player_response", {})
             .get("videoDetails", {})
             .get("viewCount")
         )
@@ -344,7 +333,7 @@ class YouTube:
         :rtype: str
         """
         return (
-            self.player_config_args.get("player_response", {})
             .get("videoDetails", {})
             .get("author", "unknown")
         )

         self.watch_html: Optional[str] = None  # the html of /watch?v=<video_id>
         self.embed_html: Optional[str] = None
         self.player_config_args: Dict = {}  # inline js in the html containing
+        self.player_response: Dict = {}
         # streams
         self.age_restricted: Optional[bool] = None
         self.vid_descr: Optional[str] = None
         self.fmt_streams: List[Stream] = []
         # video_id part of /watch?v=<video_id>
         self.video_id = extract.video_id(url)
             self.initialize_stream_objects(fmt)
         # load the player_response object (contains subtitle information)
+        self.player_response = json.loads(self.player_config_args["player_response"])
+        del self.player_config_args["player_response"]
         logger.info("init finished successfully")
     def prefetch(self) -> None:
         self.watch_html = request.get(url=self.watch_url)
         if (
             self.watch_html is None
+            or '<img class="icon meh" src="/yts/img' not in self.watch_html
         ):
             raise VideoUnavailable(video_id=self.video_id)
             )
             self.fmt_streams.append(video)
+    @property
+    def caption_tracks(self) -> List[Caption]:
+        """Get a list of :class:`Caption <Caption>`.
+        :rtype: List[Caption]
         """
+        raw_tracks = (
+            self.player_response
             .get("captions", {})
             .get("playerCaptionsTracklistRenderer", {})
             .get("captionTracks", [])
         )
+        return [Caption(track) for track in raw_tracks]
     @property
     def captions(self) -> CaptionQuery:
         :rtype: str
         """
         thumbnail_details = (
+            self.player_response.get("videoDetails", {})
             .get("thumbnail", {})
             .get("thumbnails")
         )
         """
         return self.player_config_args.get("title") or (
+            self.player_response
             .get("videoDetails", {})
             .get("title")
         )
         """
         return self.vid_descr or (
+            self.player_response
             .get("videoDetails", {})
             .get("shortDescription")
         )
         """
         return (
+            self.player_response
             .get("videoDetails", {})
             .get("averageRating")
         )
         return int(
             self.player_config_args.get("length_seconds")
             or (
+                self.player_response
                 .get("videoDetails", {})
                 .get("lengthSeconds")
             )
         """
         return int(
+            self.player_response
             .get("videoDetails", {})
             .get("viewCount")
         )
         :rtype: str
         """
         return (
+            self.player_response
             .get("videoDetails", {})
             .get("author", "unknown")
         )

pytube/contrib/playlist.py CHANGED Viewed

@@ -45,8 +45,7 @@ class Playlist:
     @staticmethod
     def _find_load_more_url(req: str) -> Optional[str]:
-        """Given an html page or fragment, returns the "load more" url if found.
-        """
         match = re.search(
             r"data-uix-load-more-href=\"(/browse_ajax\?" 'action_continuation=.*?)"',
             req,
@@ -58,6 +57,10 @@ class Playlist:
     @deprecated("This function will be removed in the future, please use .video_urls")
     def parse_links(self) -> List[str]:  # pragma: no cover
         return self.video_urls
     def _paginate(self, until_watch_id: Optional[str] = None) -> Iterable[List[str]]:

     @staticmethod
     def _find_load_more_url(req: str) -> Optional[str]:
+        """Given an html page or fragment, returns the "load more" url if found."""
         match = re.search(
             r"data-uix-load-more-href=\"(/browse_ajax\?" 'action_continuation=.*?)"',
             req,
     @deprecated("This function will be removed in the future, please use .video_urls")
     def parse_links(self) -> List[str]:  # pragma: no cover
+        """ Deprecated function for returning list of URLs
+        :return: List[str]
+        """
         return self.video_urls
     def _paginate(self, until_watch_id: Optional[str] = None) -> Iterable[List[str]]:

pytube/monostate.py CHANGED Viewed

@@ -1,9 +1,8 @@
 import io
 from typing import Any, Optional
 from typing_extensions import Protocol
-# from __future__ import annotations
 class OnProgress(Protocol):
     def __call__(

+# -*- coding: utf-8 -*-
 import io
 from typing import Any, Optional
 from typing_extensions import Protocol
 class OnProgress(Protocol):
     def __call__(

pytube/query.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
 """This module provides a query interface for media streams and captions."""
 from typing import List, Optional

 # -*- coding: utf-8 -*-
 """This module provides a query interface for media streams and captions."""
 from typing import List, Optional

pytube/streams.py CHANGED Viewed

@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
 """
 This module contains a container for stream manifest data.
@@ -169,7 +170,6 @@ class Stream:
         :returns:
             An os file system compatible filename.
         """
         filename = safe_filename(self.title)
         return f"{filename}.{self.subtype}"

 # -*- coding: utf-8 -*-
 """
 This module contains a container for stream manifest data.
         :returns:
             An os file system compatible filename.
         """
         filename = safe_filename(self.title)
         return f"{filename}.{self.subtype}"

pytube/version.py CHANGED Viewed

@@ -1,3 +1,4 @@
 __version__ = "9.6.1"
 if __name__ == "__main__":

+# -*- coding: utf-8 -*-
 __version__ = "9.6.1"
 if __name__ == "__main__":

tests/test_streams.py CHANGED Viewed

@@ -199,29 +199,23 @@ def test_on_complete_hook(cipher_signature, mocker):
 def test_author(cipher_signature):
     expected = "Test author"
-    cipher_signature.player_config_args = {
-        "player_response": {"videoDetails": {"author": expected}}
-    }
     assert cipher_signature.author == expected
     expected = "unknown"
-    cipher_signature.player_config_args = {}
     assert cipher_signature.author == expected
 def test_thumbnail_when_in_details(cipher_signature):
     expected = "some url"
-    cipher_signature.player_config_args = {
-        "player_response": {
-            "videoDetails": {"thumbnail": {"thumbnails": [{"url": expected}]}}
-        }
-    }
     assert cipher_signature.thumbnail_url == expected
 def test_thumbnail_when_not_in_details(cipher_signature):
     expected = "https://img.youtube.com/vi/9bZkp7q19f0/maxresdefault.jpg"
-    cipher_signature.player_config_args = {}
     assert cipher_signature.thumbnail_url == expected

 def test_author(cipher_signature):
     expected = "Test author"
+    cipher_signature.player_response = {"videoDetails": {"author": expected}}
     assert cipher_signature.author == expected
     expected = "unknown"
+    cipher_signature.player_response = {}
     assert cipher_signature.author == expected
 def test_thumbnail_when_in_details(cipher_signature):
     expected = "some url"
+    cipher_signature.player_response = {"videoDetails": {"thumbnail": {"thumbnails": [{"url": expected}]}}}
     assert cipher_signature.thumbnail_url == expected
 def test_thumbnail_when_not_in_details(cipher_signature):
     expected = "https://img.youtube.com/vi/9bZkp7q19f0/maxresdefault.jpg"
+    cipher_signature.player_response = {}
     assert cipher_signature.thumbnail_url == expected