|
"""Module for interacting with YouTube search.""" |
|
|
|
import logging |
|
|
|
|
|
from pytube import YouTube |
|
from pytube.innertube import InnerTube |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
class Search: |
|
def __init__(self, query): |
|
"""Initialize Search object. |
|
|
|
:param str query: |
|
Search query provided by the user. |
|
""" |
|
self.query = query |
|
self._innertube_client = InnerTube(client='WEB') |
|
|
|
|
|
|
|
self._initial_results = None |
|
|
|
self._results = None |
|
self._completion_suggestions = None |
|
|
|
|
|
|
|
self._current_continuation = None |
|
|
|
@property |
|
def completion_suggestions(self): |
|
"""Return query autocompletion suggestions for the query. |
|
|
|
:rtype: list |
|
:returns: |
|
A list of autocomplete suggestions provided by YouTube for the query. |
|
""" |
|
if self._completion_suggestions: |
|
return self._completion_suggestions |
|
if self.results: |
|
self._completion_suggestions = self._initial_results['refinements'] |
|
return self._completion_suggestions |
|
|
|
@property |
|
def results(self): |
|
"""Return search results. |
|
|
|
On first call, will generate and return the first set of results. |
|
Additional results can be generated using ``.get_next_results()``. |
|
|
|
:rtype: list |
|
:returns: |
|
A list of YouTube objects. |
|
""" |
|
if self._results: |
|
return self._results |
|
|
|
videos, continuation = self.fetch_and_parse() |
|
self._results = videos |
|
self._current_continuation = continuation |
|
return self._results |
|
|
|
def get_next_results(self): |
|
"""Use the stored continuation string to fetch the next set of results. |
|
|
|
This method does not return the results, but instead updates the results property. |
|
""" |
|
if self._current_continuation: |
|
videos, continuation = self.fetch_and_parse(self._current_continuation) |
|
self._results.extend(videos) |
|
self._current_continuation = continuation |
|
else: |
|
raise IndexError |
|
|
|
def fetch_and_parse(self, continuation=None): |
|
"""Fetch from the innertube API and parse the results. |
|
|
|
:param str continuation: |
|
Continuation string for fetching results. |
|
:rtype: tuple |
|
:returns: |
|
A tuple of a list of YouTube objects and a continuation string. |
|
""" |
|
|
|
|
|
raw_results = self.fetch_query(continuation) |
|
|
|
|
|
try: |
|
sections = raw_results['contents']['twoColumnSearchResultsRenderer'][ |
|
'primaryContents']['sectionListRenderer']['contents'] |
|
except KeyError: |
|
sections = raw_results['onResponseReceivedCommands'][0][ |
|
'appendContinuationItemsAction']['continuationItems'] |
|
item_renderer = None |
|
continuation_renderer = None |
|
for s in sections: |
|
if 'itemSectionRenderer' in s: |
|
item_renderer = s['itemSectionRenderer'] |
|
if 'continuationItemRenderer' in s: |
|
continuation_renderer = s['continuationItemRenderer'] |
|
|
|
|
|
if continuation_renderer: |
|
next_continuation = continuation_renderer['continuationEndpoint'][ |
|
'continuationCommand']['token'] |
|
else: |
|
next_continuation = None |
|
|
|
|
|
if item_renderer: |
|
videos = [] |
|
raw_video_list = item_renderer['contents'] |
|
for video_details in raw_video_list: |
|
|
|
if video_details.get('searchPyvRenderer', {}).get('ads', None): |
|
continue |
|
|
|
|
|
|
|
if 'shelfRenderer' in video_details: |
|
continue |
|
|
|
|
|
if 'radioRenderer' in video_details: |
|
continue |
|
|
|
|
|
if 'playlistRenderer' in video_details: |
|
continue |
|
|
|
|
|
if 'channelRenderer' in video_details: |
|
continue |
|
|
|
|
|
if 'horizontalCardListRenderer' in video_details: |
|
continue |
|
|
|
|
|
if 'didYouMeanRenderer' in video_details: |
|
continue |
|
|
|
|
|
if 'backgroundPromoRenderer' in video_details: |
|
continue |
|
|
|
if 'videoRenderer' not in video_details: |
|
logger.warn('Unexpected renderer encountered.') |
|
logger.warn(f'Renderer name: {video_details.keys()}') |
|
logger.warn(f'Search term: {self.query}') |
|
logger.warn( |
|
'Please open an issue at ' |
|
'https://github.com/pytube/pytube/issues ' |
|
'and provide this log output.' |
|
) |
|
continue |
|
|
|
|
|
|
|
|
|
vid_renderer = video_details['videoRenderer'] |
|
vid_id = vid_renderer['videoId'] |
|
vid_url = f'https://www.youtube.com/watch?v={vid_id}' |
|
vid_title = vid_renderer['title']['runs'][0]['text'] |
|
vid_channel_name = vid_renderer['ownerText']['runs'][0]['text'] |
|
vid_channel_uri = vid_renderer['ownerText']['runs'][0][ |
|
'navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'] |
|
|
|
|
|
if 'viewCountText' in vid_renderer: |
|
if 'runs' in vid_renderer['viewCountText']: |
|
vid_view_count_text = vid_renderer['viewCountText']['runs'][0]['text'] |
|
else: |
|
vid_view_count_text = vid_renderer['viewCountText']['simpleText'] |
|
|
|
stripped_text = vid_view_count_text.split()[0].replace(',','') |
|
if stripped_text == 'No': |
|
vid_view_count = 0 |
|
else: |
|
vid_view_count = int(stripped_text) |
|
else: |
|
vid_view_count = 0 |
|
if 'lengthText' in vid_renderer: |
|
vid_length = vid_renderer['lengthText']['simpleText'] |
|
else: |
|
vid_length = None |
|
|
|
vid_metadata = { |
|
'id': vid_id, |
|
'url': vid_url, |
|
'title': vid_title, |
|
'channel_name': vid_channel_name, |
|
'channel_url': vid_channel_uri, |
|
'view_count': vid_view_count, |
|
'length': vid_length |
|
} |
|
|
|
|
|
vid = YouTube(vid_metadata['url']) |
|
vid.author = vid_metadata['channel_name'] |
|
vid.title = vid_metadata['title'] |
|
videos.append(vid) |
|
else: |
|
videos = None |
|
|
|
return videos, next_continuation |
|
|
|
def fetch_query(self, continuation=None): |
|
"""Fetch raw results from the innertube API. |
|
|
|
:param str continuation: |
|
Continuation string for fetching results. |
|
:rtype: dict |
|
:returns: |
|
The raw json object returned by the innertube API. |
|
""" |
|
query_results = self._innertube_client.search(self.query, continuation) |
|
if not self._initial_results: |
|
self._initial_results = query_results |
|
return query_results |
|
|