Merge pull request #235 from family-guy/video-metadata-#195
Browse files- pytube/__main__.py +37 -0
- pytube/compat.py +3 -1
- pytube/extract.py +26 -0
- tests/test_extract.py +5 -0
pytube/__main__.py
CHANGED
@@ -117,6 +117,7 @@ class YouTube(object):
|
|
117 |
self.watch_html,
|
118 |
)['args']
|
119 |
|
|
|
120 |
# https://github.com/nficano/pytube/issues/165
|
121 |
stream_maps = ['url_encoded_fmt_stream_map']
|
122 |
if 'adaptive_fmts' in self.player_config_args:
|
@@ -252,6 +253,42 @@ class YouTube(object):
|
|
252 |
"""
|
253 |
return self.player_config_args['title']
|
254 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
def register_on_progress_callback(self, func):
|
256 |
"""Register a download progress callback function post initialization.
|
257 |
|
|
|
117 |
self.watch_html,
|
118 |
)['args']
|
119 |
|
120 |
+
self.vid_descr = extract.get_vid_descr(self.watch_html)
|
121 |
# https://github.com/nficano/pytube/issues/165
|
122 |
stream_maps = ['url_encoded_fmt_stream_map']
|
123 |
if 'adaptive_fmts' in self.player_config_args:
|
|
|
253 |
"""
|
254 |
return self.player_config_args['title']
|
255 |
|
256 |
+
@property
|
257 |
+
def description(self):
|
258 |
+
"""Get the video description.
|
259 |
+
|
260 |
+
:rtype: str
|
261 |
+
|
262 |
+
"""
|
263 |
+
return self.vid_descr
|
264 |
+
|
265 |
+
@property
|
266 |
+
def rating(self):
|
267 |
+
"""Get the video average rating.
|
268 |
+
|
269 |
+
:rtype: str
|
270 |
+
|
271 |
+
"""
|
272 |
+
return self.player_config_args['avg_rating']
|
273 |
+
|
274 |
+
@property
|
275 |
+
def length(self):
|
276 |
+
"""Get the video length in seconds.
|
277 |
+
|
278 |
+
:rtype: str
|
279 |
+
|
280 |
+
"""
|
281 |
+
return self.player_config_args['length_seconds']
|
282 |
+
|
283 |
+
@property
|
284 |
+
def views(self):
|
285 |
+
"""Get the number of the times the video has been viewed.
|
286 |
+
|
287 |
+
:rtype: str
|
288 |
+
|
289 |
+
"""
|
290 |
+
return self.player_config_args['view_count']
|
291 |
+
|
292 |
def register_on_progress_callback(self, func):
|
293 |
"""Register a download progress callback function post initialization.
|
294 |
|
pytube/compat.py
CHANGED
@@ -4,12 +4,14 @@
|
|
4 |
"""Python 2/3 compatibility support."""
|
5 |
import sys
|
6 |
|
|
|
7 |
PY2 = sys.version_info[0] == 2
|
8 |
PY3 = sys.version_info[0] == 3
|
9 |
PY33 = sys.version_info[0:2] >= (3, 3)
|
10 |
|
11 |
-
|
12 |
if PY2:
|
|
|
|
|
13 |
import urllib2
|
14 |
from urllib import urlencode
|
15 |
from urllib2 import URLError
|
|
|
4 |
"""Python 2/3 compatibility support."""
|
5 |
import sys
|
6 |
|
7 |
+
|
8 |
PY2 = sys.version_info[0] == 2
|
9 |
PY3 = sys.version_info[0] == 3
|
10 |
PY33 = sys.version_info[0:2] >= (3, 3)
|
11 |
|
|
|
12 |
if PY2:
|
13 |
+
reload(sys)
|
14 |
+
sys.setdefaultencoding('utf8')
|
15 |
import urllib2
|
16 |
from urllib import urlencode
|
17 |
from urllib2 import URLError
|
pytube/extract.py
CHANGED
@@ -3,12 +3,32 @@
|
|
3 |
import json
|
4 |
from collections import OrderedDict
|
5 |
|
|
|
6 |
from pytube.compat import quote
|
7 |
from pytube.compat import urlencode
|
8 |
from pytube.exceptions import RegexMatchError
|
9 |
from pytube.helpers import regex_search
|
10 |
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
def is_age_restricted(watch_html):
|
13 |
"""Check if content is age restricted.
|
14 |
|
@@ -173,3 +193,9 @@ def get_ytplayer_config(html, age_restricted=False):
|
|
173 |
pattern = r';ytplayer\.config\s*=\s*({.*?});'
|
174 |
yt_player_config = regex_search(pattern, html, group=1)
|
175 |
return json.loads(yt_player_config)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
import json
|
4 |
from collections import OrderedDict
|
5 |
|
6 |
+
from pytube.compat import HTMLParser
|
7 |
from pytube.compat import quote
|
8 |
from pytube.compat import urlencode
|
9 |
from pytube.exceptions import RegexMatchError
|
10 |
from pytube.helpers import regex_search
|
11 |
|
12 |
|
13 |
+
class PytubeHTMLParser(HTMLParser):
|
14 |
+
in_vid_descr = False
|
15 |
+
vid_descr = ''
|
16 |
+
|
17 |
+
def handle_starttag(self, tag, attrs):
|
18 |
+
if tag == 'p':
|
19 |
+
for attr in attrs:
|
20 |
+
if attr[0] == 'id' and attr[1] == 'eow-description':
|
21 |
+
self.in_vid_descr = True
|
22 |
+
|
23 |
+
def handle_endtag(self, tag):
|
24 |
+
if tag == 'p' and self.in_vid_descr:
|
25 |
+
self.in_vid_descr = False
|
26 |
+
|
27 |
+
def handle_data(self, data):
|
28 |
+
if self.in_vid_descr:
|
29 |
+
self.vid_descr += data
|
30 |
+
|
31 |
+
|
32 |
def is_age_restricted(watch_html):
|
33 |
"""Check if content is age restricted.
|
34 |
|
|
|
193 |
pattern = r';ytplayer\.config\s*=\s*({.*?});'
|
194 |
yt_player_config = regex_search(pattern, html, group=1)
|
195 |
return json.loads(yt_player_config)
|
196 |
+
|
197 |
+
|
198 |
+
def get_vid_descr(html):
|
199 |
+
html_parser = PytubeHTMLParser()
|
200 |
+
html_parser.feed(html)
|
201 |
+
return html_parser.vid_descr
|
tests/test_extract.py
CHANGED
@@ -43,3 +43,8 @@ def test_age_restricted(age_restricted):
|
|
43 |
|
44 |
def test_non_age_restricted(cipher_signature):
|
45 |
assert not extract.is_age_restricted(cipher_signature.watch_html)
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
def test_non_age_restricted(cipher_signature):
|
45 |
assert not extract.is_age_restricted(cipher_signature.watch_html)
|
46 |
+
|
47 |
+
|
48 |
+
def test_get_vid_desc(cipher_signature):
|
49 |
+
expected = "PSY - DADDY(feat. CL of 2NE1) M/V @ https://youtu.be/FrG4TEcSuRgPSY - 나팔바지(NAPAL BAJI) M/V @ https://youtu.be/tF27TNC_4pcPSY - 7TH ALBUM '칠집싸이다' on iTunes @ http://smarturl.it/PSY_7THALBUMPSY - GANGNAM STYLE(강남스타일) on iTunes @ http://smarturl.it/PsyGangnam#PSY #싸이 #GANGNAMSTYLE #강남스타일More about PSY@http://www.psypark.com/http://www.youtube.com/officialpsyhttp://www.facebook.com/officialpsyhttp://twitter.com/psy_oppahttps://www.instagram.com/42psy42http://iTunes.com/PSYhttp://sptfy.com/PSYhttp://weibo.com/psyoppahttp://twitter.com/ygent_official" # noqa
|
50 |
+
assert extract.get_vid_descr(cipher_signature.watch_html) == expected
|