Kinshuk Vasisht
commited on
Fix regex to find throttling function name (#1282)
Browse files* Generalize regex to find throttle function name
* Extend tests and fixtures for throttling name
* Rename mock asset to follow similar naming style
- pytube/cipher.py +2 -2
- tests/conftest.py +13 -9
- tests/mocks/{base.js.gz → base.js-2022-02-04.gz} +0 -0
- tests/mocks/base.js-2022-04-15.gz +0 -0
- tests/test_cipher.py +21 -8
pytube/cipher.py
CHANGED
@@ -270,7 +270,7 @@ def get_throttling_function_name(js: str) -> str:
|
|
270 |
# Bpa.length || iha("")) }};
|
271 |
# In the above case, `iha` is the relevant function name
|
272 |
r'a\.[a-zA-Z]\s*&&\s*\([a-z]\s*=\s*a\.get\("n"\)\)\s*&&\s*'
|
273 |
-
r'\([a-z]\s*=\s*([a-zA-Z0-9$]
|
274 |
]
|
275 |
logger.debug('Finding throttling function name')
|
276 |
for pattern in function_patterns:
|
@@ -285,7 +285,7 @@ def get_throttling_function_name(js: str) -> str:
|
|
285 |
idx = idx.strip("[]")
|
286 |
array = re.search(
|
287 |
r'var {nfunc}\s*=\s*(\[.+?\]);'.format(
|
288 |
-
nfunc=function_match.group(1)),
|
289 |
js
|
290 |
)
|
291 |
if array:
|
|
|
270 |
# Bpa.length || iha("")) }};
|
271 |
# In the above case, `iha` is the relevant function name
|
272 |
r'a\.[a-zA-Z]\s*&&\s*\([a-z]\s*=\s*a\.get\("n"\)\)\s*&&\s*'
|
273 |
+
r'\([a-z]\s*=\s*([a-zA-Z0-9$]+)(\[\d+\])?\([a-z]\)',
|
274 |
]
|
275 |
logger.debug('Finding throttling function name')
|
276 |
for pattern in function_patterns:
|
|
|
285 |
idx = idx.strip("[]")
|
286 |
array = re.search(
|
287 |
r'var {nfunc}\s*=\s*(\[.+?\]);'.format(
|
288 |
+
nfunc=re.escape(function_match.group(1))),
|
289 |
js
|
290 |
)
|
291 |
if array:
|
tests/conftest.py
CHANGED
@@ -150,13 +150,17 @@ def channel_videos_html():
|
|
150 |
|
151 |
@pytest.fixture
|
152 |
def base_js():
|
153 |
-
"""Youtube base.js retrieved on 2022-02-04
|
154 |
-
https://www.youtube.com/watch?v=vmzxpUsN0uA
|
|
|
155 |
"""
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
|
|
|
|
|
|
|
150 |
|
151 |
@pytest.fixture
|
152 |
def base_js():
|
153 |
+
"""Youtube base.js files retrieved on 2022-02-04 and 2022-04-15
|
154 |
+
from https://www.youtube.com/watch?v=vmzxpUsN0uA and
|
155 |
+
https://www.youtube.com/watch?v=Y4-GSFKZmEg respectively
|
156 |
"""
|
157 |
+
base_js_files = []
|
158 |
+
for file in ["base.js-2022-02-04.gz", "base.js-2022-04-15.gz"]:
|
159 |
+
file_path = os.path.join(
|
160 |
+
os.path.dirname(os.path.realpath(__file__)),
|
161 |
+
"mocks",
|
162 |
+
file,
|
163 |
+
)
|
164 |
+
with gzip.open(file_path, 'rb') as f:
|
165 |
+
base_js_files.append(f.read().decode('utf-8'))
|
166 |
+
return base_js_files
|
tests/mocks/{base.js.gz → base.js-2022-02-04.gz}
RENAMED
File without changes
|
tests/mocks/base.js-2022-04-15.gz
ADDED
Binary file (581 kB). View file
|
|
tests/test_cipher.py
CHANGED
@@ -80,11 +80,24 @@ def test_js_splice():
|
|
80 |
|
81 |
|
82 |
def test_get_throttling_function_name(base_js):
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
|
81 |
|
82 |
def test_get_throttling_function_name(base_js):
|
83 |
+
base_js_code_fragments = [
|
84 |
+
# Values expected as of 2022/02/04:
|
85 |
+
{
|
86 |
+
'raw_var' : r'var Apa=[hha]',
|
87 |
+
'raw_code': r'a.url="";a.C&&(b=a.get("n"))&&(b=Apa[0](b),a.set("n",b),'\
|
88 |
+
r'Apa.length||hha(""))}};',
|
89 |
+
'nfunc_name': 'hha'
|
90 |
+
},
|
91 |
+
# Values expected as of 2022/04/15:
|
92 |
+
{
|
93 |
+
'raw_var' : r'var $x=[uq]',
|
94 |
+
'raw_code': r'a.url="";a.D&&(b=a.get("n"))&&(b=$x[0](b),a.set("n",b),'\
|
95 |
+
r'$x.length||uq(""))',
|
96 |
+
'nfunc_name': 'uq'
|
97 |
+
}
|
98 |
+
]
|
99 |
+
for code_fragment, base_js_file in zip(base_js_code_fragments, base_js):
|
100 |
+
assert code_fragment['raw_var'] in base_js_file
|
101 |
+
assert code_fragment['raw_code'] in base_js_file
|
102 |
+
func_name = cipher.get_throttling_function_name(base_js_file)
|
103 |
+
assert func_name == code_fragment['nfunc_name']
|