Taylor Fox Dahlin commited on
Commit
79befd6
·
unverified ·
1 Parent(s): 48ea520

Fix/1033 (#1037)

Browse files

* JS emulation for calculating n value for stream URLs to fix throttling issue.

* Made parser HTMLParseError more informative.

* Add support for bracket start to parser.

* Helper for auto-generating necessary json files.

* Removed test that no longer seems to work.

.flake8 CHANGED
@@ -1,5 +1,5 @@
1
  [flake8]
2
- ignore = E231,E203,W503,Q000,WPS111,WPS305,WPS348,WPS602,D400,DAR201,S101,DAR101,C812,D104,I001,WPS306,WPS214,D401,WPS229,WPS420,WPS230,WPS414,WPS114,WPS226,WPS442,C819,WPS601,T001,RST304,WPS410,WPS428,A003,A002,I003,WPS221,WPS326,WPS201,S405,DAR301,WPS210,WPS202,WPS213,WPS301,P103,WPS407,WPS432,WPS211,S314,S310,S001,IF100,PT001,PT019
3
  max-line-length = 95
4
 
5
  [isort]
 
1
  [flake8]
2
+ ignore = E231,E203,W503,Q000,WPS111,WPS305,WPS348,WPS602,D400,DAR201,S101,DAR101,C812,D104,I001,WPS306,WPS214,D401,WPS229,WPS420,WPS230,WPS414,WPS114,WPS226,WPS442,C819,WPS601,T001,RST304,WPS410,WPS428,A003,A002,I003,WPS221,WPS326,WPS201,S405,DAR301,WPS210,WPS202,WPS213,WPS301,P103,WPS407,WPS432,WPS211,S314,S310,S001,IF100,PT001,PT019,R504
3
  max-line-length = 95
4
 
5
  [isort]
pytube/cipher.py CHANGED
@@ -19,6 +19,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple
19
 
20
  from pytube.exceptions import RegexMatchError
21
  from pytube.helpers import cache, regex_search
 
22
 
23
  logger = logging.getLogger(__name__)
24
 
@@ -39,6 +40,39 @@ class Cipher:
39
  r"\w+\[(\"\w+\")\]\(\w,(\d+)\)"
40
  ]
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def get_signature(self, ciphered_signature: str) -> str:
43
  """Decipher the signature.
44
 
@@ -218,6 +252,160 @@ def get_transform_map(js: str, var: str) -> Dict:
218
  return mapper
219
 
220
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  def reverse(arr: List, _: Optional[Any]):
222
  """Reverse elements in a list.
223
 
@@ -273,6 +461,198 @@ def swap(arr: List, b: int):
273
  return list(chain([arr[r]], arr[1:r], [arr[0]], arr[r + 1 :]))
274
 
275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  def map_functions(js_func: str) -> Callable:
277
  """For a given JavaScript transform function, return the Python equivalent.
278
 
 
19
 
20
  from pytube.exceptions import RegexMatchError
21
  from pytube.helpers import cache, regex_search
22
+ from pytube.parser import find_object_from_startpoint, throttling_array_split
23
 
24
  logger = logging.getLogger(__name__)
25
 
 
40
  r"\w+\[(\"\w+\")\]\(\w,(\d+)\)"
41
  ]
42
 
43
+ self.throttling_plan = get_throttling_plan(js)
44
+ self.throttling_array = get_throttling_function_array(js)
45
+
46
+ self.calculated_n = None
47
+
48
+ def calculate_n(self, initial_n: list):
49
+ """Converts n to the correct value to prevent throttling."""
50
+ if self.calculated_n:
51
+ return self.calculated_n
52
+
53
+ # First, update all instances of 'b' with the list(initial_n)
54
+ for i in range(len(self.throttling_array)):
55
+ if self.throttling_array[i] == 'b':
56
+ self.throttling_array[i] = initial_n
57
+
58
+ for step in self.throttling_plan:
59
+ curr_func = self.throttling_array[int(step[0])]
60
+ if not callable(curr_func):
61
+ logger.debug(f'{curr_func} is not callable.')
62
+ logger.debug(f'Throttling array:\n{self.throttling_array}\n')
63
+ raise TypeError(f'{curr_func} is not callable.')
64
+
65
+ first_arg = self.throttling_array[int(step[1])]
66
+
67
+ if len(step) == 2:
68
+ curr_func(first_arg)
69
+ elif len(step) == 3:
70
+ second_arg = self.throttling_array[int(step[2])]
71
+ curr_func(first_arg, second_arg)
72
+
73
+ self.calculated_n = ''.join(initial_n)
74
+ return self.calculated_n
75
+
76
  def get_signature(self, ciphered_signature: str) -> str:
77
  """Decipher the signature.
78
 
 
252
  return mapper
253
 
254
 
255
+ def get_throttling_function_name(js: str) -> str:
256
+ """Extract the name of the function that computes the throttling parameter.
257
+
258
+ :param str js:
259
+ The contents of the base.js asset file.
260
+ :rtype: str
261
+ :returns:
262
+ The name of the function used to compute the throttling parameter.
263
+ """
264
+ function_patterns = [
265
+ # https://github.com/ytdl-org/youtube-dl/issues/29326#issuecomment-865985377
266
+ # a.C&&(b=a.get("n"))&&(b=Dea(b),a.set("n",b))}};
267
+ # In above case, `Dea` is the relevant function name
268
+ r'a\.C&&\(b=a\.get\("n"\)\)&&\(b=([^(]+)\(b\),a\.set\("n",b\)\)}};',
269
+ ]
270
+ logger.debug('Finding throttling function name')
271
+ for pattern in function_patterns:
272
+ regex = re.compile(pattern)
273
+ function_match = regex.search(js)
274
+ if function_match:
275
+ logger.debug("finished regex search, matched: %s", pattern)
276
+ return function_match.group(1)
277
+
278
+ raise RegexMatchError(
279
+ caller="get_throttling_function_name", pattern="multiple"
280
+ )
281
+
282
+
283
+ def get_throttling_function_code(js: str) -> str:
284
+ """Extract the raw code for the throttling function.
285
+
286
+ :param str js:
287
+ The contents of the base.js asset file.
288
+ :rtype: str
289
+ :returns:
290
+ The name of the function used to compute the throttling parameter.
291
+ """
292
+ # Begin by extracting the correct function name
293
+ name = re.escape(get_throttling_function_name(js))
294
+
295
+ # Identify where the function is defined
296
+ pattern_start = r"%s=function\(\w\)" % name
297
+ regex = re.compile(pattern_start)
298
+ match = regex.search(js)
299
+
300
+ # Extract the code within curly braces for the function itself, and merge any split lines
301
+ code_lines_list = find_object_from_startpoint(js, match.span()[1]).split('\n')
302
+ joined_lines = "".join(code_lines_list)
303
+
304
+ # Prepend function definition (e.g. `Dea=function(a)`)
305
+ return match.group(0) + joined_lines
306
+
307
+
308
+ def get_throttling_function_array(js: str) -> List[Any]:
309
+ """Extract the "c" array.
310
+
311
+ :param str js:
312
+ The contents of the base.js asset file.
313
+ :returns:
314
+ The array of various integers, arrays, and functions.
315
+ """
316
+ raw_code = get_throttling_function_code(js)
317
+
318
+ array_start = r",c=\["
319
+ array_regex = re.compile(array_start)
320
+ match = array_regex.search(raw_code)
321
+
322
+ array_raw = find_object_from_startpoint(raw_code, match.span()[1] - 1)
323
+ str_array = throttling_array_split(array_raw)
324
+
325
+ converted_array = []
326
+ for el in str_array:
327
+ try:
328
+ converted_array.append(int(el))
329
+ continue
330
+ except ValueError:
331
+ # Not an integer value.
332
+ pass
333
+
334
+ if el == 'null':
335
+ converted_array.append(None)
336
+ continue
337
+
338
+ if el.startswith('"') and el.endswith('"'):
339
+ # Convert e.g. '"abcdef"' to string without quotation marks, 'abcdef'
340
+ converted_array.append(el[1:-1])
341
+ continue
342
+
343
+ if el.startswith('function'):
344
+ mapper = (
345
+ (r"{for\(\w=\(\w%\w\.length\+\w\.length\)%\w\.length;\w--;\)\w\.unshift\(\w.pop\(\)\)}", throttling_unshift), # noqa:E501
346
+ (r"{\w\.reverse\(\)}", throttling_reverse),
347
+ (r"{\w\.push\(\w\)}", throttling_push),
348
+ (r";var\s\w=\w\[0\];\w\[0\]=\w\[\w\];\w\[\w\]=\w}", throttling_swap),
349
+ (r"case\s\d+", throttling_cipher_function),
350
+ (r"\w\.splice\(0,1,\w\.splice\(\w,1,\w\[0\]\)\[0\]\)", throttling_nested_splice), # noqa:E501
351
+ (r";\w\.splice\(\w,1\)}", js_splice),
352
+ (r"\w\.splice\(-\w\)\.reverse\(\)\.forEach\(function\(\w\){\w\.unshift\(\w\)}\)", throttling_prepend), # noqa:E501
353
+ (r"for\(var \w=\w\.length;\w;\)\w\.push\(\w\.splice\(--\w,1\)\[0\]\)}", throttling_reverse), # noqa:E501
354
+ )
355
+
356
+ found = False
357
+ for pattern, fn in mapper:
358
+ if re.search(pattern, el):
359
+ converted_array.append(fn)
360
+ found = True
361
+ if found:
362
+ continue
363
+
364
+ converted_array.append(el)
365
+
366
+ # Replace null elements with array itself
367
+ for i in range(len(converted_array)):
368
+ if converted_array[i] is None:
369
+ converted_array[i] = converted_array
370
+
371
+ return converted_array
372
+
373
+
374
+ def get_throttling_plan(js: str):
375
+ """Extract the "throttling plan".
376
+
377
+ The "throttling plan" is a list of tuples used for calling functions
378
+ in the c array. The first element of the tuple is the index of the
379
+ function to call, and any remaining elements of the tuple are arguments
380
+ to pass to that function.
381
+
382
+ :param str js:
383
+ The contents of the base.js asset file.
384
+ :returns:
385
+ The full function code for computing the throttlign parameter.
386
+ """
387
+ raw_code = get_throttling_function_code(js)
388
+
389
+ transform_start = r"try{"
390
+ plan_regex = re.compile(transform_start)
391
+ match = plan_regex.search(raw_code)
392
+
393
+ transform_plan_raw = find_object_from_startpoint(raw_code, match.span()[1] - 1)
394
+
395
+ # Steps are either c[x](c[y]) or c[x](c[y],c[z])
396
+ step_start = r"c\[(\d+)\]\(c\[(\d+)\](,c(\[(\d+)\]))?\)"
397
+ step_regex = re.compile(step_start)
398
+ matches = step_regex.findall(transform_plan_raw)
399
+ transform_steps = []
400
+ for match in matches:
401
+ if match[4] != '':
402
+ transform_steps.append((match[0],match[1],match[4]))
403
+ else:
404
+ transform_steps.append((match[0],match[1]))
405
+
406
+ return transform_steps
407
+
408
+
409
  def reverse(arr: List, _: Optional[Any]):
410
  """Reverse elements in a list.
411
 
 
461
  return list(chain([arr[r]], arr[1:r], [arr[0]], arr[r + 1 :]))
462
 
463
 
464
+ def throttling_reverse(arr: list):
465
+ """Reverses the input list.
466
+
467
+ Needs to do an in-place reversal so that the passed list gets changed.
468
+ To accomplish this, we create a reversed copy, and then change each
469
+ indvidual element.
470
+ """
471
+ reverse_copy = arr.copy()[::-1]
472
+ for i in range(len(reverse_copy)):
473
+ arr[i] = reverse_copy[i]
474
+
475
+
476
+ def throttling_push(d: list, e: Any):
477
+ """Pushes an element onto a list."""
478
+ d.append(e)
479
+
480
+
481
+ def throttling_mod_func(d: list, e: int):
482
+ """Perform the modular function from the throttling array functions.
483
+
484
+ In the javascript, the modular operation is as follows:
485
+ e = (e % d.length + d.length) % d.length
486
+
487
+ We simply translate this to python here.
488
+ """
489
+ return (e % len(d) + len(d)) % len(d)
490
+
491
+
492
+ def throttling_unshift(d: list, e: int):
493
+ """Rotates the elements of the list to the right.
494
+
495
+ In the javascript, the operation is as follows:
496
+ for(e=(e%d.length+d.length)%d.length;e--;)d.unshift(d.pop())
497
+ """
498
+ e = throttling_mod_func(d, e)
499
+ new_arr = d[-e:] + d[:-e]
500
+ d.clear()
501
+ for el in new_arr:
502
+ d.append(el)
503
+
504
+
505
+ def throttling_cipher_function(d: list, e: str):
506
+ """This ciphers d with e to generate a new list.
507
+
508
+ In the javascript, the operation is as follows:
509
+ var h = [A-Za-z0-9-_], f = 96; // simplified from switch-case loop
510
+ d.forEach(
511
+ function(l,m,n){
512
+ this.push(
513
+ n[m]=h[
514
+ (h.indexOf(l)-h.indexOf(this[m])+m-32+f--)%h.length
515
+ ]
516
+ )
517
+ },
518
+ e.split("")
519
+ )
520
+ """
521
+ h = list('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_')
522
+ f = 96
523
+ # by naming it "this" we can more closely reflect the js
524
+ this = list(e)
525
+
526
+ # This is so we don't run into weirdness with enumerate while
527
+ # we change the input list
528
+ copied_list = d.copy()
529
+
530
+ for m, l in enumerate(copied_list):
531
+ bracket_val = (h.index(l) - h.index(this[m]) + m - 32 + f) % len(h)
532
+ this.append(
533
+ h[bracket_val]
534
+ )
535
+ d[m] = h[bracket_val]
536
+ f -= 1
537
+
538
+
539
+ def throttling_nested_splice(d: list, e: int):
540
+ """Nested splice function in throttling js.
541
+
542
+ In the javascript, the operation is as follows:
543
+ function(d,e){
544
+ e=(e%d.length+d.length)%d.length;
545
+ d.splice(
546
+ 0,
547
+ 1,
548
+ d.splice(
549
+ e,
550
+ 1,
551
+ d[0]
552
+ )[0]
553
+ )
554
+ }
555
+
556
+ While testing, all this seemed to do is swap element 0 and e,
557
+ but the actual process is preserved in case there was an edge
558
+ case that was not considered.
559
+ """
560
+ e = throttling_mod_func(d, e)
561
+ inner_splice = js_splice(
562
+ d,
563
+ e,
564
+ 1,
565
+ d[0]
566
+ )
567
+ js_splice(
568
+ d,
569
+ 0,
570
+ 1,
571
+ inner_splice[0]
572
+ )
573
+
574
+
575
+ def throttling_prepend(d: list, e: int):
576
+ """
577
+
578
+ In the javascript, the operation is as follows:
579
+ function(d,e){
580
+ e=(e%d.length+d.length)%d.length;
581
+ d.splice(-e).reverse().forEach(
582
+ function(f){
583
+ d.unshift(f)
584
+ }
585
+ )
586
+ }
587
+
588
+ Effectively, this moves the last e elements of d to the beginning.
589
+ """
590
+ start_len = len(d)
591
+ # First, calculate e
592
+ e = throttling_mod_func(d, e)
593
+
594
+ # Then do the prepending
595
+ new_arr = d[-e:] + d[:-e]
596
+
597
+ # And update the input list
598
+ d.clear()
599
+ for el in new_arr:
600
+ d.append(el)
601
+
602
+ end_len = len(d)
603
+ assert start_len == end_len
604
+
605
+
606
+ def throttling_swap(d: list, e: int):
607
+ """Swap positions of the 0'th and e'th elements in-place."""
608
+ e = throttling_mod_func(d, e)
609
+ f = d[0]
610
+ d[0] = d[e]
611
+ d[e] = f
612
+
613
+
614
+ def js_splice(arr: list, start: int, delete_count=None, *items):
615
+ """Implementation of javascript's splice function.
616
+
617
+ :param list arr:
618
+ Array to splice
619
+ :param int start:
620
+ Index at which to start changing the array
621
+ :param int delete_count:
622
+ Number of elements to delete from the array
623
+ :param *items:
624
+ Items to add to the array
625
+
626
+ Reference: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Array/splice # noqa:E501
627
+ """
628
+ # Special conditions for start value
629
+ try:
630
+ if start > len(arr):
631
+ start = len(arr)
632
+ # If start is negative, count backwards from end
633
+ if start < 0:
634
+ start = len(arr) - start
635
+ except TypeError:
636
+ # Non-integer start values are treated as 0 in js
637
+ start = 0
638
+
639
+ # Special condition when delete_count is greater than remaining elements
640
+ if not delete_count or delete_count >= len(arr) - start:
641
+ delete_count = len(arr) - start # noqa: N806
642
+
643
+ deleted_elements = arr[start:start + delete_count]
644
+
645
+ # Splice appropriately.
646
+ new_arr = arr[:start] + list(items) + arr[start + delete_count:]
647
+
648
+ # Replace contents of input array
649
+ arr.clear()
650
+ for el in new_arr:
651
+ arr.append(el)
652
+
653
+ return deleted_elements
654
+
655
+
656
  def map_functions(js_func: str) -> Callable:
657
  """For a given JavaScript transform function, return the Python equivalent.
658
 
pytube/extract.py CHANGED
@@ -6,7 +6,7 @@ import re
6
  from collections import OrderedDict
7
  from datetime import datetime
8
  from typing import Any, Dict, List, Optional, Tuple
9
- from urllib.parse import parse_qs, parse_qsl, quote, unquote, urlencode
10
 
11
  from pytube.cipher import Cipher
12
  from pytube.exceptions import HTMLParseError, LiveStreamError, RegexMatchError
@@ -468,6 +468,22 @@ def apply_signature(config_args: Dict, fmt: str, js: str) -> None:
468
  logger.debug(
469
  "finished descrambling signature for itag=%s", stream["itag"]
470
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
471
  # 403 forbidden fix
472
  stream_manifest[i]["url"] = url + "&sig=" + signature
473
 
 
6
  from collections import OrderedDict
7
  from datetime import datetime
8
  from typing import Any, Dict, List, Optional, Tuple
9
+ from urllib.parse import parse_qs, parse_qsl, quote, unquote, urlencode, urlparse
10
 
11
  from pytube.cipher import Cipher
12
  from pytube.exceptions import HTMLParseError, LiveStreamError, RegexMatchError
 
468
  logger.debug(
469
  "finished descrambling signature for itag=%s", stream["itag"]
470
  )
471
+ query_params = parse_qs(urlparse(url).query)
472
+ if 'ratebypass' not in query_params.keys():
473
+ # Cipher n to get the updated value
474
+
475
+ initial_n = list(query_params['n'][0])
476
+ new_n = cipher.calculate_n(initial_n)
477
+ query_params['n'][0] = new_n
478
+
479
+ # Update the value
480
+ parsed = urlparse(url)
481
+ # The parsed query params are lists of a single element, convert to proper dicts.
482
+ query_params = {
483
+ k: v[0] for k,v in query_params.items()
484
+ }
485
+ url = f'{parsed.scheme}://{parsed.netloc}{parsed.path}?{urlencode(query_params)}'
486
+
487
  # 403 forbidden fix
488
  stream_manifest[i]["url"] = url + "&sig=" + signature
489
 
pytube/helpers.py CHANGED
@@ -280,6 +280,24 @@ def uniqueify(duped_list: List) -> List:
280
  return result
281
 
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  def create_mock_html_json(vid_id) -> Dict[str, Any]:
284
  """Generate a json.gz file with sample html responses.
285
 
@@ -311,6 +329,7 @@ def create_mock_html_json(vid_id) -> Dict[str, Any]:
311
  'vid_info_raw': yt.vid_info_raw
312
  }
313
 
 
314
  with gzip.open(gzip_filepath, 'wb') as f:
315
  f.write(json.dumps(html_data).encode('utf-8'))
316
 
 
280
  return result
281
 
282
 
283
+ def generate_all_html_json_mocks():
284
+ """Regenerate the video mock json files for all current test videos.
285
+
286
+ This should automatically output to the test/mocks directory.
287
+ """
288
+ test_vid_ids = [
289
+ '2lAe1cqCOXo',
290
+ '5YceQ8YqYMc',
291
+ 'hZpzr8TbF08',
292
+ 'irauhITDrsE',
293
+ 'm8uHb5jIGN8',
294
+ 'QRS8MkLhQmM',
295
+ 'WXxV9g7lsFE'
296
+ ]
297
+ for vid_id in test_vid_ids:
298
+ create_mock_html_json(vid_id)
299
+
300
+
301
  def create_mock_html_json(vid_id) -> Dict[str, Any]:
302
  """Generate a json.gz file with sample html responses.
303
 
 
329
  'vid_info_raw': yt.vid_info_raw
330
  }
331
 
332
+ logger.info(f'Outputing json.gz file to {gzip_filepath}')
333
  with gzip.open(gzip_filepath, 'wb') as f:
334
  f.write(json.dumps(html_data).encode('utf-8'))
335
 
pytube/parser.py CHANGED
@@ -69,12 +69,12 @@ def find_object_from_startpoint(html, start_point):
69
  A dict created from parsing the object.
70
  """
71
  html = html[start_point:]
72
- if html[0] != '{':
73
- raise HTMLParseError('Invalid start point.')
74
 
75
  # First letter MUST be a open brace, so we put that in the stack,
76
  # and skip the first character.
77
- stack = ['{']
78
  i = 1
79
 
80
  context_closers = {
@@ -132,3 +132,47 @@ def parse_for_object_from_startpoint(html, start_point):
132
  return ast.literal_eval(full_obj)
133
  except (ValueError, SyntaxError):
134
  raise HTMLParseError('Could not parse object.')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  A dict created from parsing the object.
70
  """
71
  html = html[start_point:]
72
+ if html[0] not in ['{','[']:
73
+ raise HTMLParseError(f'Invalid start point. Start of HTML:\n{html[:20]}')
74
 
75
  # First letter MUST be a open brace, so we put that in the stack,
76
  # and skip the first character.
77
+ stack = [html[0]]
78
  i = 1
79
 
80
  context_closers = {
 
132
  return ast.literal_eval(full_obj)
133
  except (ValueError, SyntaxError):
134
  raise HTMLParseError('Could not parse object.')
135
+
136
+
137
+ def throttling_array_split(js_array):
138
+ """Parses the throttling array into a python list of strings.
139
+
140
+ Expects input to begin with `[` and close with `]`.
141
+
142
+ :param str js_array:
143
+ The javascript array, as a string.
144
+ :rtype: list:
145
+ :returns:
146
+ A list of strings representing splits on `,` in the throttling array.
147
+ """
148
+ results = []
149
+ curr_substring = js_array[1:]
150
+
151
+ comma_regex = re.compile(r",")
152
+ func_regex = re.compile(r"function\([^)]+\)")
153
+
154
+ while len(curr_substring) > 0:
155
+ if curr_substring.startswith('function'):
156
+ # Handle functions separately. These can contain commas
157
+ match = func_regex.search(curr_substring)
158
+ match_start, match_end = match.span()
159
+
160
+ function_text = find_object_from_startpoint(curr_substring, match.span()[1])
161
+ full_function_def = curr_substring[:match_end + len(function_text)]
162
+ results.append(full_function_def)
163
+ curr_substring = curr_substring[len(full_function_def) + 1:]
164
+ else:
165
+ match = comma_regex.search(curr_substring)
166
+
167
+ # Try-catch to capture end of array
168
+ try:
169
+ match_start, match_end = match.span()
170
+ except AttributeError:
171
+ match_start = len(curr_substring) - 1
172
+ match_end = match_start + 1
173
+
174
+ curr_el = curr_substring[:match_start]
175
+ results.append(curr_el)
176
+ curr_substring = curr_substring[match_end:]
177
+
178
+ return results
tests/mocks/yt-video-2lAe1cqCOXo-html.json.gz CHANGED
Binary files a/tests/mocks/yt-video-2lAe1cqCOXo-html.json.gz and b/tests/mocks/yt-video-2lAe1cqCOXo-html.json.gz differ
 
tests/mocks/yt-video-5YceQ8YqYMc-html.json.gz CHANGED
Binary files a/tests/mocks/yt-video-5YceQ8YqYMc-html.json.gz and b/tests/mocks/yt-video-5YceQ8YqYMc-html.json.gz differ
 
tests/mocks/yt-video-QRS8MkLhQmM-html.json.gz CHANGED
Binary files a/tests/mocks/yt-video-QRS8MkLhQmM-html.json.gz and b/tests/mocks/yt-video-QRS8MkLhQmM-html.json.gz differ
 
tests/mocks/yt-video-WXxV9g7lsFE-html.json.gz CHANGED
Binary files a/tests/mocks/yt-video-WXxV9g7lsFE-html.json.gz and b/tests/mocks/yt-video-WXxV9g7lsFE-html.json.gz differ
 
tests/mocks/yt-video-hZpzr8TbF08-html.json.gz CHANGED
Binary files a/tests/mocks/yt-video-hZpzr8TbF08-html.json.gz and b/tests/mocks/yt-video-hZpzr8TbF08-html.json.gz differ
 
tests/mocks/yt-video-irauhITDrsE-html.json.gz CHANGED
Binary files a/tests/mocks/yt-video-irauhITDrsE-html.json.gz and b/tests/mocks/yt-video-irauhITDrsE-html.json.gz differ
 
tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz CHANGED
Binary files a/tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz and b/tests/mocks/yt-video-m8uHb5jIGN8-html.json.gz differ
 
tests/test_cipher.py CHANGED
@@ -27,3 +27,53 @@ def test_reverse():
27
  def test_splice():
28
  assert cipher.splice([1, 2, 3, 4], 2) == [3, 4]
29
  assert cipher.splice([1, 2, 3, 4], 1) == [2, 3, 4]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def test_splice():
28
  assert cipher.splice([1, 2, 3, 4], 2) == [3, 4]
29
  assert cipher.splice([1, 2, 3, 4], 1) == [2, 3, 4]
30
+
31
+
32
+ def test_throttling_reverse():
33
+ a = [1, 2, 3, 4]
34
+ cipher.throttling_reverse(a)
35
+ assert a == [4, 3, 2, 1]
36
+
37
+
38
+ def test_throttling_push():
39
+ a = [1, 2, 3, 4]
40
+ cipher.throttling_push(a, 5)
41
+ assert a == [1, 2, 3, 4, 5]
42
+
43
+
44
+ def test_throttling_unshift():
45
+ a = [1, 2, 3, 4]
46
+ cipher.throttling_unshift(a, 2)
47
+ assert a == [3, 4, 1, 2]
48
+
49
+
50
+ def test_throttling_nested_splice():
51
+ a = [1, 2, 3, 4]
52
+ cipher.throttling_nested_splice(a, 2)
53
+ assert a == [3, 2, 1, 4]
54
+ cipher.throttling_nested_splice(a, 0)
55
+ assert a == [3, 2, 1, 4]
56
+
57
+
58
+ def test_throttling_prepend():
59
+ a = [1, 2, 3, 4]
60
+ cipher.throttling_prepend(a, 1)
61
+ assert a == [4, 1, 2, 3]
62
+ a = [1, 2, 3, 4]
63
+ cipher.throttling_prepend(a, 2)
64
+ assert a == [3, 4, 1, 2]
65
+
66
+
67
+ def test_throttling_swap():
68
+ a = [1, 2, 3, 4]
69
+ cipher.throttling_swap(a, 3)
70
+ assert a == [4, 2, 3, 1]
71
+
72
+
73
+ def test_js_splice():
74
+ mapping = {
75
+
76
+ }
77
+ for args, result in mapping.items():
78
+ a = [1, 2, 3, 4]
79
+ assert cipher.js_splice(a, *args) == result
tests/test_extract.py CHANGED
@@ -1,6 +1,7 @@
1
  """Unit tests for the :module:`extract <extract>` module."""
2
  from datetime import datetime
3
  import pytest
 
4
 
5
  from pytube import extract
6
  from pytube.exceptions import RegexMatchError
@@ -31,10 +32,11 @@ def test_info_url_age_restricted(cipher_signature):
31
 
32
  def test_js_url(cipher_signature):
33
  expected = (
34
- "https://youtube.com/s/player/9b65e980/player_ias.vflset/en_US/base.js"
35
  )
36
  result = extract.js_url(cipher_signature.watch_html)
37
- assert expected == result
 
38
 
39
 
40
  def test_age_restricted(age_restricted):
@@ -90,12 +92,6 @@ def test_get_ytplayer_js_with_no_match_should_error():
90
  extract.get_ytplayer_js("")
91
 
92
 
93
- def test_signature_cipher_does_not_error(stream_dict):
94
- config_args = extract.get_ytplayer_config(stream_dict)['args']
95
- extract.apply_descrambler(config_args, "url_encoded_fmt_stream_map")
96
- assert "s" in config_args["url_encoded_fmt_stream_map"][0].keys()
97
-
98
-
99
  def test_initial_data_missing():
100
  with pytest.raises(RegexMatchError):
101
  extract.initial_data('')
 
1
  """Unit tests for the :module:`extract <extract>` module."""
2
  from datetime import datetime
3
  import pytest
4
+ import re
5
 
6
  from pytube import extract
7
  from pytube.exceptions import RegexMatchError
 
32
 
33
  def test_js_url(cipher_signature):
34
  expected = (
35
+ r"https://youtube.com/s/player/([\w\d]+)/player_ias.vflset/en_US/base.js"
36
  )
37
  result = extract.js_url(cipher_signature.watch_html)
38
+ match = re.search(expected, result)
39
+ assert match is not None
40
 
41
 
42
  def test_age_restricted(age_restricted):
 
92
  extract.get_ytplayer_js("")
93
 
94
 
 
 
 
 
 
 
95
  def test_initial_data_missing():
96
  with pytest.raises(RegexMatchError):
97
  extract.initial_data('')
tests/test_streams.py CHANGED
@@ -57,7 +57,7 @@ def test_title(cipher_signature):
57
 
58
 
59
  def test_expiration(cipher_signature):
60
- assert cipher_signature.streams[0].expiration == datetime(2020, 10, 30, 5, 39, 41)
61
 
62
 
63
  def test_caption_tracks(presigned_video):
@@ -97,7 +97,15 @@ def test_description(cipher_signature):
97
 
98
 
99
  def test_rating(cipher_signature):
100
- assert cipher_signature.rating == 2.0860765
 
 
 
 
 
 
 
 
101
 
102
 
103
  def test_length(cipher_signature):
 
57
 
58
 
59
  def test_expiration(cipher_signature):
60
+ assert cipher_signature.streams[0].expiration >= datetime(2020, 10, 30, 5, 39, 41)
61
 
62
 
63
  def test_caption_tracks(presigned_video):
 
97
 
98
 
99
  def test_rating(cipher_signature):
100
+ """Test the rating value of a YouTube object.
101
+
102
+ This changes each time we rebuild the json files, so we want to use
103
+ an estimate of where it will be. The two values seen to make this
104
+ estimate were 2.073431 and 2.0860765. This represents a range of
105
+ ~0.007 below and ~0.006 above 2.08. Allowing for up to 0.02 in either
106
+ direction should provide a steady indicator of correctness.
107
+ """
108
+ assert abs(cipher_signature.rating - 2.08) < 0.02
109
 
110
 
111
  def test_length(cipher_signature):