Taylor Fox Dahlin
commited on
Bugfixes (#1058)
Browse files* Add catch for suggested search results; accounts for edge case of no views on result.
* Added exception handling for incorrect cached js files.
* Now allows you to *actually* set filenames, instead of doing partial overrides to create safe filenames.
* Innertube improvements, and skeleton code for future innertube work
- pytube/__main__.py +11 -1
- pytube/cipher.py +2 -2
- pytube/contrib/search.py +13 -1
- pytube/innertube.py +96 -10
- pytube/streams.py +4 -6
- tests/test_streams.py +8 -8
pytube/__main__.py
CHANGED
@@ -220,7 +220,17 @@ class YouTube:
|
|
220 |
extract.apply_descrambler(self.vid_info, fmt)
|
221 |
extract.apply_descrambler(self.player_config_args, fmt)
|
222 |
|
223 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
224 |
|
225 |
# build instances of :class:`Stream <Stream>`
|
226 |
# Initialize stream objects
|
|
|
220 |
extract.apply_descrambler(self.vid_info, fmt)
|
221 |
extract.apply_descrambler(self.player_config_args, fmt)
|
222 |
|
223 |
+
# If the cached js doesn't work, try fetching a new js file
|
224 |
+
# https://github.com/pytube/pytube/issues/1054
|
225 |
+
try:
|
226 |
+
extract.apply_signature(self.player_config_args, fmt, self.js)
|
227 |
+
except exceptions.ExtractError:
|
228 |
+
# To force an update to the js file, we clear the cache and retry
|
229 |
+
self._js = None
|
230 |
+
self._js_url = None
|
231 |
+
pytube.__js__ = None
|
232 |
+
pytube.__js_url__ = None
|
233 |
+
extract.apply_signature(self.player_config_args, fmt, self.js)
|
234 |
|
235 |
# build instances of :class:`Stream <Stream>`
|
236 |
# Initialize stream objects
|
pytube/cipher.py
CHANGED
@@ -17,7 +17,7 @@ import re
|
|
17 |
from itertools import chain
|
18 |
from typing import Any, Callable, Dict, List, Optional, Tuple
|
19 |
|
20 |
-
from pytube.exceptions import RegexMatchError
|
21 |
from pytube.helpers import cache, regex_search
|
22 |
from pytube.parser import find_object_from_startpoint, throttling_array_split
|
23 |
|
@@ -60,7 +60,7 @@ class Cipher:
|
|
60 |
if not callable(curr_func):
|
61 |
logger.debug(f'{curr_func} is not callable.')
|
62 |
logger.debug(f'Throttling array:\n{self.throttling_array}\n')
|
63 |
-
raise
|
64 |
|
65 |
first_arg = self.throttling_array[int(step[1])]
|
66 |
|
|
|
17 |
from itertools import chain
|
18 |
from typing import Any, Callable, Dict, List, Optional, Tuple
|
19 |
|
20 |
+
from pytube.exceptions import ExtractError, RegexMatchError
|
21 |
from pytube.helpers import cache, regex_search
|
22 |
from pytube.parser import find_object_from_startpoint, throttling_array_split
|
23 |
|
|
|
60 |
if not callable(curr_func):
|
61 |
logger.debug(f'{curr_func} is not callable.')
|
62 |
logger.debug(f'Throttling array:\n{self.throttling_array}\n')
|
63 |
+
raise ExtractError(f'{curr_func} is not callable.')
|
64 |
|
65 |
first_arg = self.throttling_array[int(step[1])]
|
66 |
|
pytube/contrib/search.py
CHANGED
@@ -137,6 +137,14 @@ class Search:
|
|
137 |
if 'channelRenderer' in video_details:
|
138 |
continue
|
139 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
if 'videoRenderer' not in video_details:
|
141 |
logger.warn('Unexpected renderer encountered.')
|
142 |
logger.warn(f'Renderer name: {video_details.keys()}')
|
@@ -166,7 +174,11 @@ class Search:
|
|
166 |
else:
|
167 |
vid_view_count_text = vid_renderer['viewCountText']['simpleText']
|
168 |
# Strip ' views' text, then remove commas
|
169 |
-
|
|
|
|
|
|
|
|
|
170 |
else:
|
171 |
vid_view_count = 0
|
172 |
if 'lengthText' in vid_renderer:
|
|
|
137 |
if 'channelRenderer' in video_details:
|
138 |
continue
|
139 |
|
140 |
+
# Skip 'people also searched for' results
|
141 |
+
if 'horizontalCardListRenderer' in video_details:
|
142 |
+
continue
|
143 |
+
|
144 |
+
# Can't seem to reproduce, probably related to typo fix suggestions
|
145 |
+
if 'didYouMeanRenderer' in video_details:
|
146 |
+
continue
|
147 |
+
|
148 |
if 'videoRenderer' not in video_details:
|
149 |
logger.warn('Unexpected renderer encountered.')
|
150 |
logger.warn(f'Renderer name: {video_details.keys()}')
|
|
|
174 |
else:
|
175 |
vid_view_count_text = vid_renderer['viewCountText']['simpleText']
|
176 |
# Strip ' views' text, then remove commas
|
177 |
+
stripped_text = vid_view_count_text.split()[0].replace(',','')
|
178 |
+
if stripped_text == 'No':
|
179 |
+
vid_view_count = 0
|
180 |
+
else:
|
181 |
+
vid_view_count = int(stripped_text)
|
182 |
else:
|
183 |
vid_view_count = 0
|
184 |
if 'lengthText' in vid_renderer:
|
pytube/innertube.py
CHANGED
@@ -5,6 +5,7 @@ interfaces returns raw results. These should instead be parsed to extract
|
|
5 |
the useful information for the end user.
|
6 |
"""
|
7 |
# Native python imports
|
|
|
8 |
import json
|
9 |
from urllib import parse
|
10 |
|
@@ -12,8 +13,53 @@ from urllib import parse
|
|
12 |
from pytube import request
|
13 |
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
class InnerTube:
|
16 |
"""Object for interacting with the innertube API."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
@property
|
18 |
def base_url(self):
|
19 |
"""Return the base url endpoint for the innertube API."""
|
@@ -23,30 +69,31 @@ class InnerTube:
|
|
23 |
def base_data(self):
|
24 |
"""Return the base json data to transmit to the innertube API."""
|
25 |
return {
|
26 |
-
'context':
|
27 |
-
'client': {
|
28 |
-
'clientName': 'WEB',
|
29 |
-
'clientVersion': '2.20200720.00.02'
|
30 |
-
}
|
31 |
-
}
|
32 |
}
|
33 |
|
34 |
@property
|
35 |
def base_params(self):
|
36 |
"""Return the base query parameters to transmit to the innertube API."""
|
37 |
return {
|
38 |
-
'key':
|
39 |
}
|
40 |
|
41 |
def _call_api(self, endpoint, query, data):
|
42 |
"""Make a request to a given endpoint with the provided query parameters and data."""
|
43 |
endpoint_url = f'{endpoint}?{parse.urlencode(query)}'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
response = request._execute_request(
|
45 |
endpoint_url,
|
46 |
'POST',
|
47 |
-
headers=
|
48 |
-
'Content-Type': 'application/json',
|
49 |
-
},
|
50 |
data=data
|
51 |
)
|
52 |
return json.loads(response.read())
|
@@ -122,3 +169,42 @@ class InnerTube:
|
|
122 |
data['continuation'] = continuation
|
123 |
data.update(self.base_data)
|
124 |
return self._call_api(endpoint, query, data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
the useful information for the end user.
|
6 |
"""
|
7 |
# Native python imports
|
8 |
+
from datetime import datetime
|
9 |
import json
|
10 |
from urllib import parse
|
11 |
|
|
|
13 |
from pytube import request
|
14 |
|
15 |
|
16 |
+
_default_clients = {
|
17 |
+
'WEB': {
|
18 |
+
'context': {
|
19 |
+
'client': {
|
20 |
+
'clientName': 'WEB',
|
21 |
+
'clientVersion': '2.20200720.00.02'
|
22 |
+
}
|
23 |
+
},
|
24 |
+
'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
|
25 |
+
},
|
26 |
+
'ANDROID': {
|
27 |
+
'context': {
|
28 |
+
'client': {
|
29 |
+
'clientName': 'ANDROID',
|
30 |
+
'clientVersion': '16.20'
|
31 |
+
}
|
32 |
+
},
|
33 |
+
'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
|
34 |
+
}
|
35 |
+
}
|
36 |
+
_token_timeout = 1800
|
37 |
+
|
38 |
+
|
39 |
class InnerTube:
|
40 |
"""Object for interacting with the innertube API."""
|
41 |
+
def __init__(self, client='WEB', bearer_token=None):
|
42 |
+
self.context = _default_clients[client]['context']
|
43 |
+
self.api_key = _default_clients[client]['api_key']
|
44 |
+
self.bearer_token = bearer_token
|
45 |
+
self.last_refresh = None
|
46 |
+
self.refresh_bearer_token()
|
47 |
+
|
48 |
+
def refresh_bearer_token(self, force=False):
|
49 |
+
"""Refreshes the OAuth token.
|
50 |
+
|
51 |
+
This is skeleton code for potential future functionality, so it is incomplete.
|
52 |
+
"""
|
53 |
+
# Skip refresh if it's been less than 30 minutes
|
54 |
+
if self.last_refresh and not force:
|
55 |
+
# Use a 30-minute timer.
|
56 |
+
if (datetime.now() - self.last_refresh).total_seconds() < _token_timeout:
|
57 |
+
return
|
58 |
+
|
59 |
+
# TODO: Refresh the token
|
60 |
+
|
61 |
+
self.last_refresh = datetime.now()
|
62 |
+
|
63 |
@property
|
64 |
def base_url(self):
|
65 |
"""Return the base url endpoint for the innertube API."""
|
|
|
69 |
def base_data(self):
|
70 |
"""Return the base json data to transmit to the innertube API."""
|
71 |
return {
|
72 |
+
'context': self.context
|
|
|
|
|
|
|
|
|
|
|
73 |
}
|
74 |
|
75 |
@property
|
76 |
def base_params(self):
|
77 |
"""Return the base query parameters to transmit to the innertube API."""
|
78 |
return {
|
79 |
+
'key': self.api_key
|
80 |
}
|
81 |
|
82 |
def _call_api(self, endpoint, query, data):
|
83 |
"""Make a request to a given endpoint with the provided query parameters and data."""
|
84 |
endpoint_url = f'{endpoint}?{parse.urlencode(query)}'
|
85 |
+
headers = {
|
86 |
+
'Content-Type': 'application/json',
|
87 |
+
}
|
88 |
+
# Add the bearer token if applicable
|
89 |
+
if self.bearer_token:
|
90 |
+
self.refresh_bearer_token()
|
91 |
+
headers['authorization'] = f'Bearer {self.bearer_token}'
|
92 |
+
|
93 |
response = request._execute_request(
|
94 |
endpoint_url,
|
95 |
'POST',
|
96 |
+
headers=headers,
|
|
|
|
|
97 |
data=data
|
98 |
)
|
99 |
return json.loads(response.read())
|
|
|
169 |
data['continuation'] = continuation
|
170 |
data.update(self.base_data)
|
171 |
return self._call_api(endpoint, query, data)
|
172 |
+
|
173 |
+
def verify_age(self, video_id):
|
174 |
+
"""Make a request to the age_verify endpoint.
|
175 |
+
|
176 |
+
Notable examples of the types of video this verification step is for:
|
177 |
+
* https://www.youtube.com/watch?v=QLdAhwSBZ3w
|
178 |
+
* https://www.youtube.com/watch?v=hc0ZDaAZQT0
|
179 |
+
|
180 |
+
:param str video_id:
|
181 |
+
The video id to get player info for.
|
182 |
+
:rtype: dict
|
183 |
+
:returns:
|
184 |
+
Returns information that includes a URL for bypassing certain restrictions.
|
185 |
+
"""
|
186 |
+
endpoint = f'{self.base_url}/verify_age'
|
187 |
+
data = {
|
188 |
+
'nextEndpoint': {
|
189 |
+
'urlEndpoint': {
|
190 |
+
'url': f'/watch?v={video_id}'
|
191 |
+
}
|
192 |
+
},
|
193 |
+
'setControvercy': True
|
194 |
+
}
|
195 |
+
data.update(self.base_data)
|
196 |
+
result = self._call_api(endpoint, self.base_params, data)
|
197 |
+
return result
|
198 |
+
|
199 |
+
def get_transcript(self, video_id):
|
200 |
+
"""Make a request to the get_transcript endpoint.
|
201 |
+
|
202 |
+
This is likely related to captioning for videos, but is currently untested.
|
203 |
+
"""
|
204 |
+
endpoint = f'{self.base_url}/get_transcript'
|
205 |
+
query = {
|
206 |
+
'videoId': video_id,
|
207 |
+
}
|
208 |
+
query.update(self.base_params)
|
209 |
+
result = self._call_api(endpoint, query, self.base_data)
|
210 |
+
return result
|
pytube/streams.py
CHANGED
@@ -282,16 +282,14 @@ class Stream:
|
|
282 |
|
283 |
def get_file_path(
|
284 |
self,
|
285 |
-
filename: Optional[str],
|
286 |
-
output_path: Optional[str],
|
287 |
filename_prefix: Optional[str] = None,
|
288 |
) -> str:
|
289 |
-
if filename:
|
290 |
-
filename = f"{safe_filename(filename)}.{self.subtype}"
|
291 |
-
else:
|
292 |
filename = self.default_filename
|
293 |
if filename_prefix:
|
294 |
-
filename = f"{
|
295 |
return os.path.join(target_directory(output_path), filename)
|
296 |
|
297 |
def exists_at_path(self, file_path: str) -> bool:
|
|
|
282 |
|
283 |
def get_file_path(
|
284 |
self,
|
285 |
+
filename: Optional[str] = None,
|
286 |
+
output_path: Optional[str] = None,
|
287 |
filename_prefix: Optional[str] = None,
|
288 |
) -> str:
|
289 |
+
if not filename:
|
|
|
|
|
290 |
filename = self.default_filename
|
291 |
if filename_prefix:
|
292 |
+
filename = f"{filename_prefix}{filename}"
|
293 |
return os.path.join(target_directory(output_path), filename)
|
294 |
|
295 |
def exists_at_path(self, file_path: str) -> bool:
|
tests/test_streams.py
CHANGED
@@ -111,7 +111,7 @@ def test_views(cipher_signature):
|
|
111 |
|
112 |
|
113 |
@mock.patch(
|
114 |
-
"pytube.
|
115 |
)
|
116 |
@mock.patch(
|
117 |
"pytube.request.stream",
|
@@ -124,7 +124,7 @@ def test_download(cipher_signature):
|
|
124 |
|
125 |
|
126 |
@mock.patch(
|
127 |
-
"pytube.
|
128 |
)
|
129 |
@mock.patch(
|
130 |
"pytube.request.stream",
|
@@ -142,7 +142,7 @@ def test_download_with_prefix(cipher_signature):
|
|
142 |
|
143 |
|
144 |
@mock.patch(
|
145 |
-
"pytube.
|
146 |
)
|
147 |
@mock.patch(
|
148 |
"pytube.request.stream",
|
@@ -155,12 +155,12 @@ def test_download_with_filename(cipher_signature):
|
|
155 |
file_path = stream.download(filename="cool name bro")
|
156 |
assert file_path == os.path.join(
|
157 |
"/target",
|
158 |
-
"cool name bro
|
159 |
)
|
160 |
|
161 |
|
162 |
@mock.patch(
|
163 |
-
"pytube.
|
164 |
)
|
165 |
@mock.patch(
|
166 |
"pytube.request.stream",
|
@@ -181,7 +181,7 @@ def test_download_with_existing(cipher_signature):
|
|
181 |
|
182 |
|
183 |
@mock.patch(
|
184 |
-
"pytube.
|
185 |
)
|
186 |
@mock.patch(
|
187 |
"pytube.request.stream",
|
@@ -212,7 +212,7 @@ def test_progressive_streams_return_includes_video_track(cipher_signature):
|
|
212 |
|
213 |
|
214 |
@mock.patch(
|
215 |
-
"pytube.
|
216 |
)
|
217 |
@mock.patch(
|
218 |
"pytube.request.stream",
|
@@ -233,7 +233,7 @@ def test_on_progress_hook(cipher_signature):
|
|
233 |
|
234 |
|
235 |
@mock.patch(
|
236 |
-
"pytube.
|
237 |
)
|
238 |
@mock.patch(
|
239 |
"pytube.request.stream",
|
|
|
111 |
|
112 |
|
113 |
@mock.patch(
|
114 |
+
"pytube.request.head", MagicMock(return_value={"content-length": "6796391"})
|
115 |
)
|
116 |
@mock.patch(
|
117 |
"pytube.request.stream",
|
|
|
124 |
|
125 |
|
126 |
@mock.patch(
|
127 |
+
"pytube.request.head", MagicMock(return_value={"content-length": "16384"})
|
128 |
)
|
129 |
@mock.patch(
|
130 |
"pytube.request.stream",
|
|
|
142 |
|
143 |
|
144 |
@mock.patch(
|
145 |
+
"pytube.request.head", MagicMock(return_value={"content-length": "16384"})
|
146 |
)
|
147 |
@mock.patch(
|
148 |
"pytube.request.stream",
|
|
|
155 |
file_path = stream.download(filename="cool name bro")
|
156 |
assert file_path == os.path.join(
|
157 |
"/target",
|
158 |
+
"cool name bro"
|
159 |
)
|
160 |
|
161 |
|
162 |
@mock.patch(
|
163 |
+
"pytube.request.head", MagicMock(return_value={"content-length": "16384"})
|
164 |
)
|
165 |
@mock.patch(
|
166 |
"pytube.request.stream",
|
|
|
181 |
|
182 |
|
183 |
@mock.patch(
|
184 |
+
"pytube.request.head", MagicMock(return_value={"content-length": "16384"})
|
185 |
)
|
186 |
@mock.patch(
|
187 |
"pytube.request.stream",
|
|
|
212 |
|
213 |
|
214 |
@mock.patch(
|
215 |
+
"pytube.request.head", MagicMock(return_value={"content-length": "16384"})
|
216 |
)
|
217 |
@mock.patch(
|
218 |
"pytube.request.stream",
|
|
|
233 |
|
234 |
|
235 |
@mock.patch(
|
236 |
+
"pytube.request.head", MagicMock(return_value={"content-length": "16384"})
|
237 |
)
|
238 |
@mock.patch(
|
239 |
"pytube.request.stream",
|