"""Module for interacting with YouTube search.""" # Native python imports import logging # Local imports from pytube import YouTube from pytube.innertube import InnerTube logger = logging.getLogger(__name__) class Search: def __init__(self, query): """Initialize Search object. :param str query: Search query provided by the user. """ self.query = query self._innertube_client = InnerTube(client='WEB') # The first search, without a continuation, is structured differently # and contains completion suggestions, so we must store this separately self._initial_results = None self._results = None self._completion_suggestions = None # Used for keeping track of query continuations so that new results # are always returned when get_next_results() is called self._current_continuation = None @property def completion_suggestions(self): """Return query autocompletion suggestions for the query. :rtype: list :returns: A list of autocomplete suggestions provided by YouTube for the query. """ if self._completion_suggestions: return self._completion_suggestions if self.results: self._completion_suggestions = self._initial_results['refinements'] return self._completion_suggestions @property def results(self): """Return search results. On first call, will generate and return the first set of results. Additional results can be generated using ``.get_next_results()``. :rtype: list :returns: A list of YouTube objects. """ if self._results: return self._results videos, continuation = self.fetch_and_parse() self._results = videos self._current_continuation = continuation return self._results def get_next_results(self): """Use the stored continuation string to fetch the next set of results. This method does not return the results, but instead updates the results property. """ if self._current_continuation: videos, continuation = self.fetch_and_parse(self._current_continuation) self._results.extend(videos) self._current_continuation = continuation else: raise IndexError def fetch_and_parse(self, continuation=None): """Fetch from the innertube API and parse the results. :param str continuation: Continuation string for fetching results. :rtype: tuple :returns: A tuple of a list of YouTube objects and a continuation string. """ # Begin by executing the query and identifying the relevant sections # of the results raw_results = self.fetch_query(continuation) # Initial result is handled by try block, continuations by except block try: sections = raw_results['contents']['twoColumnSearchResultsRenderer'][ 'primaryContents']['sectionListRenderer']['contents'] except KeyError: sections = raw_results['onResponseReceivedCommands'][0][ 'appendContinuationItemsAction']['continuationItems'] item_renderer = None continuation_renderer = None for s in sections: if 'itemSectionRenderer' in s: item_renderer = s['itemSectionRenderer'] if 'continuationItemRenderer' in s: continuation_renderer = s['continuationItemRenderer'] # If the continuationItemRenderer doesn't exist, assume no further results if continuation_renderer: next_continuation = continuation_renderer['continuationEndpoint'][ 'continuationCommand']['token'] else: next_continuation = None # If the itemSectionRenderer doesn't exist, assume no results. if item_renderer: videos = [] raw_video_list = item_renderer['contents'] for video_details in raw_video_list: # Skip over ads if video_details.get('searchPyvRenderer', {}).get('ads', None): continue # Skip "recommended" type videos e.g. "people also watched" and "popular X" # that break up the search results if 'shelfRenderer' in video_details: continue # Skip auto-generated "mix" playlist results if 'radioRenderer' in video_details: continue # Skip playlist results if 'playlistRenderer' in video_details: continue # Skip channel results if 'channelRenderer' in video_details: continue # Skip 'people also searched for' results if 'horizontalCardListRenderer' in video_details: continue # Can't seem to reproduce, probably related to typo fix suggestions if 'didYouMeanRenderer' in video_details: continue # Seems to be the renderer used for the image shown on a no results page if 'backgroundPromoRenderer' in video_details: continue if 'videoRenderer' not in video_details: logger.warn('Unexpected renderer encountered.') logger.warn(f'Renderer name: {video_details.keys()}') logger.warn(f'Search term: {self.query}') logger.warn( 'Please open an issue at ' 'https://github.com/pytube/pytube/issues ' 'and provide this log output.' ) continue # Extract relevant video information from the details. # Some of this can be used to pre-populate attributes of the # YouTube object. vid_renderer = video_details['videoRenderer'] vid_id = vid_renderer['videoId'] vid_url = f'https://www.youtube.com/watch?v={vid_id}' vid_title = vid_renderer['title']['runs'][0]['text'] vid_channel_name = vid_renderer['ownerText']['runs'][0]['text'] vid_channel_uri = vid_renderer['ownerText']['runs'][0][ 'navigationEndpoint']['commandMetadata']['webCommandMetadata']['url'] # Livestreams have "runs", non-livestreams have "simpleText", # and scheduled releases do not have 'viewCountText' if 'viewCountText' in vid_renderer: if 'runs' in vid_renderer['viewCountText']: vid_view_count_text = vid_renderer['viewCountText']['runs'][0]['text'] else: vid_view_count_text = vid_renderer['viewCountText']['simpleText'] # Strip ' views' text, then remove commas stripped_text = vid_view_count_text.split()[0].replace(',','') if stripped_text == 'No': vid_view_count = 0 else: vid_view_count = int(stripped_text) else: vid_view_count = 0 if 'lengthText' in vid_renderer: vid_length = vid_renderer['lengthText']['simpleText'] else: vid_length = None vid_metadata = { 'id': vid_id, 'url': vid_url, 'title': vid_title, 'channel_name': vid_channel_name, 'channel_url': vid_channel_uri, 'view_count': vid_view_count, 'length': vid_length } # Construct YouTube object from metadata and append to results vid = YouTube(vid_metadata['url']) vid.author = vid_metadata['channel_name'] vid.title = vid_metadata['title'] videos.append(vid) else: videos = None return videos, next_continuation def fetch_query(self, continuation=None): """Fetch raw results from the innertube API. :param str continuation: Continuation string for fetching results. :rtype: dict :returns: The raw json object returned by the innertube API. """ query_results = self._innertube_client.search(self.query, continuation) if not self._initial_results: self._initial_results = query_results return query_results # noqa:R504