Taylor Fox Dahlin commited on
Commit
8f73fed
·
unverified ·
1 Parent(s): 851eb99

Bugfixes (#1058)

Browse files

* Add catch for suggested search results; accounts for edge case of no views on result.

* Added exception handling for incorrect cached js files.

* Now allows you to *actually* set filenames, instead of doing partial overrides to create safe filenames.

* Innertube improvements, and skeleton code for future innertube work

pytube/__main__.py CHANGED
@@ -220,7 +220,17 @@ class YouTube:
220
  extract.apply_descrambler(self.vid_info, fmt)
221
  extract.apply_descrambler(self.player_config_args, fmt)
222
 
223
- extract.apply_signature(self.player_config_args, fmt, self.js)
 
 
 
 
 
 
 
 
 
 
224
 
225
  # build instances of :class:`Stream <Stream>`
226
  # Initialize stream objects
 
220
  extract.apply_descrambler(self.vid_info, fmt)
221
  extract.apply_descrambler(self.player_config_args, fmt)
222
 
223
+ # If the cached js doesn't work, try fetching a new js file
224
+ # https://github.com/pytube/pytube/issues/1054
225
+ try:
226
+ extract.apply_signature(self.player_config_args, fmt, self.js)
227
+ except exceptions.ExtractError:
228
+ # To force an update to the js file, we clear the cache and retry
229
+ self._js = None
230
+ self._js_url = None
231
+ pytube.__js__ = None
232
+ pytube.__js_url__ = None
233
+ extract.apply_signature(self.player_config_args, fmt, self.js)
234
 
235
  # build instances of :class:`Stream <Stream>`
236
  # Initialize stream objects
pytube/cipher.py CHANGED
@@ -17,7 +17,7 @@ import re
17
  from itertools import chain
18
  from typing import Any, Callable, Dict, List, Optional, Tuple
19
 
20
- from pytube.exceptions import RegexMatchError
21
  from pytube.helpers import cache, regex_search
22
  from pytube.parser import find_object_from_startpoint, throttling_array_split
23
 
@@ -60,7 +60,7 @@ class Cipher:
60
  if not callable(curr_func):
61
  logger.debug(f'{curr_func} is not callable.')
62
  logger.debug(f'Throttling array:\n{self.throttling_array}\n')
63
- raise TypeError(f'{curr_func} is not callable.')
64
 
65
  first_arg = self.throttling_array[int(step[1])]
66
 
 
17
  from itertools import chain
18
  from typing import Any, Callable, Dict, List, Optional, Tuple
19
 
20
+ from pytube.exceptions import ExtractError, RegexMatchError
21
  from pytube.helpers import cache, regex_search
22
  from pytube.parser import find_object_from_startpoint, throttling_array_split
23
 
 
60
  if not callable(curr_func):
61
  logger.debug(f'{curr_func} is not callable.')
62
  logger.debug(f'Throttling array:\n{self.throttling_array}\n')
63
+ raise ExtractError(f'{curr_func} is not callable.')
64
 
65
  first_arg = self.throttling_array[int(step[1])]
66
 
pytube/contrib/search.py CHANGED
@@ -137,6 +137,14 @@ class Search:
137
  if 'channelRenderer' in video_details:
138
  continue
139
 
 
 
 
 
 
 
 
 
140
  if 'videoRenderer' not in video_details:
141
  logger.warn('Unexpected renderer encountered.')
142
  logger.warn(f'Renderer name: {video_details.keys()}')
@@ -166,7 +174,11 @@ class Search:
166
  else:
167
  vid_view_count_text = vid_renderer['viewCountText']['simpleText']
168
  # Strip ' views' text, then remove commas
169
- vid_view_count = int(vid_view_count_text.split()[0].replace(',',''))
 
 
 
 
170
  else:
171
  vid_view_count = 0
172
  if 'lengthText' in vid_renderer:
 
137
  if 'channelRenderer' in video_details:
138
  continue
139
 
140
+ # Skip 'people also searched for' results
141
+ if 'horizontalCardListRenderer' in video_details:
142
+ continue
143
+
144
+ # Can't seem to reproduce, probably related to typo fix suggestions
145
+ if 'didYouMeanRenderer' in video_details:
146
+ continue
147
+
148
  if 'videoRenderer' not in video_details:
149
  logger.warn('Unexpected renderer encountered.')
150
  logger.warn(f'Renderer name: {video_details.keys()}')
 
174
  else:
175
  vid_view_count_text = vid_renderer['viewCountText']['simpleText']
176
  # Strip ' views' text, then remove commas
177
+ stripped_text = vid_view_count_text.split()[0].replace(',','')
178
+ if stripped_text == 'No':
179
+ vid_view_count = 0
180
+ else:
181
+ vid_view_count = int(stripped_text)
182
  else:
183
  vid_view_count = 0
184
  if 'lengthText' in vid_renderer:
pytube/innertube.py CHANGED
@@ -5,6 +5,7 @@ interfaces returns raw results. These should instead be parsed to extract
5
  the useful information for the end user.
6
  """
7
  # Native python imports
 
8
  import json
9
  from urllib import parse
10
 
@@ -12,8 +13,53 @@ from urllib import parse
12
  from pytube import request
13
 
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  class InnerTube:
16
  """Object for interacting with the innertube API."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  @property
18
  def base_url(self):
19
  """Return the base url endpoint for the innertube API."""
@@ -23,30 +69,31 @@ class InnerTube:
23
  def base_data(self):
24
  """Return the base json data to transmit to the innertube API."""
25
  return {
26
- 'context': {
27
- 'client': {
28
- 'clientName': 'WEB',
29
- 'clientVersion': '2.20200720.00.02'
30
- }
31
- }
32
  }
33
 
34
  @property
35
  def base_params(self):
36
  """Return the base query parameters to transmit to the innertube API."""
37
  return {
38
- 'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
39
  }
40
 
41
  def _call_api(self, endpoint, query, data):
42
  """Make a request to a given endpoint with the provided query parameters and data."""
43
  endpoint_url = f'{endpoint}?{parse.urlencode(query)}'
 
 
 
 
 
 
 
 
44
  response = request._execute_request(
45
  endpoint_url,
46
  'POST',
47
- headers={
48
- 'Content-Type': 'application/json',
49
- },
50
  data=data
51
  )
52
  return json.loads(response.read())
@@ -122,3 +169,42 @@ class InnerTube:
122
  data['continuation'] = continuation
123
  data.update(self.base_data)
124
  return self._call_api(endpoint, query, data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  the useful information for the end user.
6
  """
7
  # Native python imports
8
+ from datetime import datetime
9
  import json
10
  from urllib import parse
11
 
 
13
  from pytube import request
14
 
15
 
16
+ _default_clients = {
17
+ 'WEB': {
18
+ 'context': {
19
+ 'client': {
20
+ 'clientName': 'WEB',
21
+ 'clientVersion': '2.20200720.00.02'
22
+ }
23
+ },
24
+ 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
25
+ },
26
+ 'ANDROID': {
27
+ 'context': {
28
+ 'client': {
29
+ 'clientName': 'ANDROID',
30
+ 'clientVersion': '16.20'
31
+ }
32
+ },
33
+ 'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
34
+ }
35
+ }
36
+ _token_timeout = 1800
37
+
38
+
39
  class InnerTube:
40
  """Object for interacting with the innertube API."""
41
+ def __init__(self, client='WEB', bearer_token=None):
42
+ self.context = _default_clients[client]['context']
43
+ self.api_key = _default_clients[client]['api_key']
44
+ self.bearer_token = bearer_token
45
+ self.last_refresh = None
46
+ self.refresh_bearer_token()
47
+
48
+ def refresh_bearer_token(self, force=False):
49
+ """Refreshes the OAuth token.
50
+
51
+ This is skeleton code for potential future functionality, so it is incomplete.
52
+ """
53
+ # Skip refresh if it's been less than 30 minutes
54
+ if self.last_refresh and not force:
55
+ # Use a 30-minute timer.
56
+ if (datetime.now() - self.last_refresh).total_seconds() < _token_timeout:
57
+ return
58
+
59
+ # TODO: Refresh the token
60
+
61
+ self.last_refresh = datetime.now()
62
+
63
  @property
64
  def base_url(self):
65
  """Return the base url endpoint for the innertube API."""
 
69
  def base_data(self):
70
  """Return the base json data to transmit to the innertube API."""
71
  return {
72
+ 'context': self.context
 
 
 
 
 
73
  }
74
 
75
  @property
76
  def base_params(self):
77
  """Return the base query parameters to transmit to the innertube API."""
78
  return {
79
+ 'key': self.api_key
80
  }
81
 
82
  def _call_api(self, endpoint, query, data):
83
  """Make a request to a given endpoint with the provided query parameters and data."""
84
  endpoint_url = f'{endpoint}?{parse.urlencode(query)}'
85
+ headers = {
86
+ 'Content-Type': 'application/json',
87
+ }
88
+ # Add the bearer token if applicable
89
+ if self.bearer_token:
90
+ self.refresh_bearer_token()
91
+ headers['authorization'] = f'Bearer {self.bearer_token}'
92
+
93
  response = request._execute_request(
94
  endpoint_url,
95
  'POST',
96
+ headers=headers,
 
 
97
  data=data
98
  )
99
  return json.loads(response.read())
 
169
  data['continuation'] = continuation
170
  data.update(self.base_data)
171
  return self._call_api(endpoint, query, data)
172
+
173
+ def verify_age(self, video_id):
174
+ """Make a request to the age_verify endpoint.
175
+
176
+ Notable examples of the types of video this verification step is for:
177
+ * https://www.youtube.com/watch?v=QLdAhwSBZ3w
178
+ * https://www.youtube.com/watch?v=hc0ZDaAZQT0
179
+
180
+ :param str video_id:
181
+ The video id to get player info for.
182
+ :rtype: dict
183
+ :returns:
184
+ Returns information that includes a URL for bypassing certain restrictions.
185
+ """
186
+ endpoint = f'{self.base_url}/verify_age'
187
+ data = {
188
+ 'nextEndpoint': {
189
+ 'urlEndpoint': {
190
+ 'url': f'/watch?v={video_id}'
191
+ }
192
+ },
193
+ 'setControvercy': True
194
+ }
195
+ data.update(self.base_data)
196
+ result = self._call_api(endpoint, self.base_params, data)
197
+ return result
198
+
199
+ def get_transcript(self, video_id):
200
+ """Make a request to the get_transcript endpoint.
201
+
202
+ This is likely related to captioning for videos, but is currently untested.
203
+ """
204
+ endpoint = f'{self.base_url}/get_transcript'
205
+ query = {
206
+ 'videoId': video_id,
207
+ }
208
+ query.update(self.base_params)
209
+ result = self._call_api(endpoint, query, self.base_data)
210
+ return result
pytube/streams.py CHANGED
@@ -282,16 +282,14 @@ class Stream:
282
 
283
  def get_file_path(
284
  self,
285
- filename: Optional[str],
286
- output_path: Optional[str],
287
  filename_prefix: Optional[str] = None,
288
  ) -> str:
289
- if filename:
290
- filename = f"{safe_filename(filename)}.{self.subtype}"
291
- else:
292
  filename = self.default_filename
293
  if filename_prefix:
294
- filename = f"{safe_filename(filename_prefix)}{filename}"
295
  return os.path.join(target_directory(output_path), filename)
296
 
297
  def exists_at_path(self, file_path: str) -> bool:
 
282
 
283
  def get_file_path(
284
  self,
285
+ filename: Optional[str] = None,
286
+ output_path: Optional[str] = None,
287
  filename_prefix: Optional[str] = None,
288
  ) -> str:
289
+ if not filename:
 
 
290
  filename = self.default_filename
291
  if filename_prefix:
292
+ filename = f"{filename_prefix}{filename}"
293
  return os.path.join(target_directory(output_path), filename)
294
 
295
  def exists_at_path(self, file_path: str) -> bool:
tests/test_streams.py CHANGED
@@ -111,7 +111,7 @@ def test_views(cipher_signature):
111
 
112
 
113
  @mock.patch(
114
- "pytube.streams.request.head", MagicMock(return_value={"content-length": "6796391"})
115
  )
116
  @mock.patch(
117
  "pytube.request.stream",
@@ -124,7 +124,7 @@ def test_download(cipher_signature):
124
 
125
 
126
  @mock.patch(
127
- "pytube.streams.request.head", MagicMock(return_value={"content-length": "16384"})
128
  )
129
  @mock.patch(
130
  "pytube.request.stream",
@@ -142,7 +142,7 @@ def test_download_with_prefix(cipher_signature):
142
 
143
 
144
  @mock.patch(
145
- "pytube.streams.request.head", MagicMock(return_value={"content-length": "16384"})
146
  )
147
  @mock.patch(
148
  "pytube.request.stream",
@@ -155,12 +155,12 @@ def test_download_with_filename(cipher_signature):
155
  file_path = stream.download(filename="cool name bro")
156
  assert file_path == os.path.join(
157
  "/target",
158
- "cool name bro.mp4"
159
  )
160
 
161
 
162
  @mock.patch(
163
- "pytube.streams.request.head", MagicMock(return_value={"content-length": "16384"})
164
  )
165
  @mock.patch(
166
  "pytube.request.stream",
@@ -181,7 +181,7 @@ def test_download_with_existing(cipher_signature):
181
 
182
 
183
  @mock.patch(
184
- "pytube.streams.request.head", MagicMock(return_value={"content-length": "16384"})
185
  )
186
  @mock.patch(
187
  "pytube.request.stream",
@@ -212,7 +212,7 @@ def test_progressive_streams_return_includes_video_track(cipher_signature):
212
 
213
 
214
  @mock.patch(
215
- "pytube.streams.request.head", MagicMock(return_value={"content-length": "16384"})
216
  )
217
  @mock.patch(
218
  "pytube.request.stream",
@@ -233,7 +233,7 @@ def test_on_progress_hook(cipher_signature):
233
 
234
 
235
  @mock.patch(
236
- "pytube.streams.request.head", MagicMock(return_value={"content-length": "16384"})
237
  )
238
  @mock.patch(
239
  "pytube.request.stream",
 
111
 
112
 
113
  @mock.patch(
114
+ "pytube.request.head", MagicMock(return_value={"content-length": "6796391"})
115
  )
116
  @mock.patch(
117
  "pytube.request.stream",
 
124
 
125
 
126
  @mock.patch(
127
+ "pytube.request.head", MagicMock(return_value={"content-length": "16384"})
128
  )
129
  @mock.patch(
130
  "pytube.request.stream",
 
142
 
143
 
144
  @mock.patch(
145
+ "pytube.request.head", MagicMock(return_value={"content-length": "16384"})
146
  )
147
  @mock.patch(
148
  "pytube.request.stream",
 
155
  file_path = stream.download(filename="cool name bro")
156
  assert file_path == os.path.join(
157
  "/target",
158
+ "cool name bro"
159
  )
160
 
161
 
162
  @mock.patch(
163
+ "pytube.request.head", MagicMock(return_value={"content-length": "16384"})
164
  )
165
  @mock.patch(
166
  "pytube.request.stream",
 
181
 
182
 
183
  @mock.patch(
184
+ "pytube.request.head", MagicMock(return_value={"content-length": "16384"})
185
  )
186
  @mock.patch(
187
  "pytube.request.stream",
 
212
 
213
 
214
  @mock.patch(
215
+ "pytube.request.head", MagicMock(return_value={"content-length": "16384"})
216
  )
217
  @mock.patch(
218
  "pytube.request.stream",
 
233
 
234
 
235
  @mock.patch(
236
+ "pytube.request.head", MagicMock(return_value={"content-length": "16384"})
237
  )
238
  @mock.patch(
239
  "pytube.request.stream",