Vishnunarayan K I commited on
Commit
63c3954
·
unverified ·
2 Parent(s): 4030ef4 b8192f2

Merge pull request #235 from family-guy/video-metadata-#195

Browse files
pytube/__main__.py CHANGED
@@ -117,6 +117,7 @@ class YouTube(object):
117
  self.watch_html,
118
  )['args']
119
 
 
120
  # https://github.com/nficano/pytube/issues/165
121
  stream_maps = ['url_encoded_fmt_stream_map']
122
  if 'adaptive_fmts' in self.player_config_args:
@@ -252,6 +253,42 @@ class YouTube(object):
252
  """
253
  return self.player_config_args['title']
254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  def register_on_progress_callback(self, func):
256
  """Register a download progress callback function post initialization.
257
 
 
117
  self.watch_html,
118
  )['args']
119
 
120
+ self.vid_descr = extract.get_vid_descr(self.watch_html)
121
  # https://github.com/nficano/pytube/issues/165
122
  stream_maps = ['url_encoded_fmt_stream_map']
123
  if 'adaptive_fmts' in self.player_config_args:
 
253
  """
254
  return self.player_config_args['title']
255
 
256
+ @property
257
+ def description(self):
258
+ """Get the video description.
259
+
260
+ :rtype: str
261
+
262
+ """
263
+ return self.vid_descr
264
+
265
+ @property
266
+ def rating(self):
267
+ """Get the video average rating.
268
+
269
+ :rtype: str
270
+
271
+ """
272
+ return self.player_config_args['avg_rating']
273
+
274
+ @property
275
+ def length(self):
276
+ """Get the video length in seconds.
277
+
278
+ :rtype: str
279
+
280
+ """
281
+ return self.player_config_args['length_seconds']
282
+
283
+ @property
284
+ def views(self):
285
+ """Get the number of the times the video has been viewed.
286
+
287
+ :rtype: str
288
+
289
+ """
290
+ return self.player_config_args['view_count']
291
+
292
  def register_on_progress_callback(self, func):
293
  """Register a download progress callback function post initialization.
294
 
pytube/compat.py CHANGED
@@ -4,12 +4,14 @@
4
  """Python 2/3 compatibility support."""
5
  import sys
6
 
 
7
  PY2 = sys.version_info[0] == 2
8
  PY3 = sys.version_info[0] == 3
9
  PY33 = sys.version_info[0:2] >= (3, 3)
10
 
11
-
12
  if PY2:
 
 
13
  import urllib2
14
  from urllib import urlencode
15
  from urllib2 import URLError
 
4
  """Python 2/3 compatibility support."""
5
  import sys
6
 
7
+
8
  PY2 = sys.version_info[0] == 2
9
  PY3 = sys.version_info[0] == 3
10
  PY33 = sys.version_info[0:2] >= (3, 3)
11
 
 
12
  if PY2:
13
+ reload(sys)
14
+ sys.setdefaultencoding('utf8')
15
  import urllib2
16
  from urllib import urlencode
17
  from urllib2 import URLError
pytube/extract.py CHANGED
@@ -3,12 +3,32 @@
3
  import json
4
  from collections import OrderedDict
5
 
 
6
  from pytube.compat import quote
7
  from pytube.compat import urlencode
8
  from pytube.exceptions import RegexMatchError
9
  from pytube.helpers import regex_search
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def is_age_restricted(watch_html):
13
  """Check if content is age restricted.
14
 
@@ -173,3 +193,9 @@ def get_ytplayer_config(html, age_restricted=False):
173
  pattern = r';ytplayer\.config\s*=\s*({.*?});'
174
  yt_player_config = regex_search(pattern, html, group=1)
175
  return json.loads(yt_player_config)
 
 
 
 
 
 
 
3
  import json
4
  from collections import OrderedDict
5
 
6
+ from pytube.compat import HTMLParser
7
  from pytube.compat import quote
8
  from pytube.compat import urlencode
9
  from pytube.exceptions import RegexMatchError
10
  from pytube.helpers import regex_search
11
 
12
 
13
+ class PytubeHTMLParser(HTMLParser):
14
+ in_vid_descr = False
15
+ vid_descr = ''
16
+
17
+ def handle_starttag(self, tag, attrs):
18
+ if tag == 'p':
19
+ for attr in attrs:
20
+ if attr[0] == 'id' and attr[1] == 'eow-description':
21
+ self.in_vid_descr = True
22
+
23
+ def handle_endtag(self, tag):
24
+ if tag == 'p' and self.in_vid_descr:
25
+ self.in_vid_descr = False
26
+
27
+ def handle_data(self, data):
28
+ if self.in_vid_descr:
29
+ self.vid_descr += data
30
+
31
+
32
  def is_age_restricted(watch_html):
33
  """Check if content is age restricted.
34
 
 
193
  pattern = r';ytplayer\.config\s*=\s*({.*?});'
194
  yt_player_config = regex_search(pattern, html, group=1)
195
  return json.loads(yt_player_config)
196
+
197
+
198
+ def get_vid_descr(html):
199
+ html_parser = PytubeHTMLParser()
200
+ html_parser.feed(html)
201
+ return html_parser.vid_descr
tests/test_extract.py CHANGED
@@ -43,3 +43,8 @@ def test_age_restricted(age_restricted):
43
 
44
  def test_non_age_restricted(cipher_signature):
45
  assert not extract.is_age_restricted(cipher_signature.watch_html)
 
 
 
 
 
 
43
 
44
  def test_non_age_restricted(cipher_signature):
45
  assert not extract.is_age_restricted(cipher_signature.watch_html)
46
+
47
+
48
+ def test_get_vid_desc(cipher_signature):
49
+ expected = "PSY - DADDY(feat. CL of 2NE1) M/V @ https://youtu.be/FrG4TEcSuRgPSY - 나팔바지(NAPAL BAJI) M/V @ https://youtu.be/tF27TNC_4pcPSY - 7TH ALBUM '칠집싸이다' on iTunes @ http://smarturl.it/PSY_7THALBUMPSY - GANGNAM STYLE(강남스타일) on iTunes @ http://smarturl.it/PsyGangnam#PSY #싸이 #GANGNAMSTYLE #강남스타일More about PSY@http://www.psypark.com/http://www.youtube.com/officialpsyhttp://www.facebook.com/officialpsyhttp://twitter.com/psy_oppahttps://www.instagram.com/42psy42http://iTunes.com/PSYhttp://sptfy.com/PSYhttp://weibo.com/psyoppahttp://twitter.com/ygent_official" # noqa
50
+ assert extract.get_vid_descr(cipher_signature.watch_html) == expected