tubifier / pytube /innertube.py
wldmr's picture
app file
837fdb6
raw
history blame
No virus
11.7 kB
"""This module is designed to interact with the innertube API.
This module is NOT intended to be used directly by end users, as each of the
interfaces returns raw results. These should instead be parsed to extract
the useful information for the end user.
"""
# Native python imports
import json
import os
import pathlib
import time
from urllib import parse
# Local imports
from pytube import request
# YouTube on TV client secrets
_client_id = '861556708454-d6dlm3lh05idd8npek18k6be8ba3oc68.apps.googleusercontent.com'
_client_secret = 'SboVhoG9s0rNafixCSGGKXAT'
# Extracted API keys -- unclear what these are linked to.
_api_keys = [
'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
'AIzaSyCtkvNIR1HCEwzsqK6JuE6KqpyjusIRI30',
'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
'AIzaSyC8UYZpvA2eknNex0Pjid0_eTLJoDu6los',
'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw',
'AIzaSyDHQ9ipnphqTzDqZsbtd8_Ru4_kiKVQe2k'
]
_default_clients = {
'WEB': {
'context': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20200720.00.02'
}
},
'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
},
'ANDROID': {
'context': {
'client': {
'clientName': 'ANDROID',
'clientVersion': '16.20'
}
},
'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
},
'WEB_EMBED': {
'context': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20210721.00.00',
'clientScreen': 'EMBED'
}
},
'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
},
'ANDROID_EMBED': {
'context': {
'client': {
'clientName': 'ANDROID',
'clientVersion': '16.20',
'clientScreen': 'EMBED'
}
},
'api_key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
}
}
_token_timeout = 1800
_cache_dir = pathlib.Path(__file__).parent.resolve() / '__cache__'
_token_file = os.path.join(_cache_dir, 'tokens.json')
class InnerTube:
"""Object for interacting with the innertube API."""
def __init__(self, client='ANDROID', use_oauth=False, allow_cache=True):
"""Initialize an InnerTube object.
:param str client:
Client to use for the object.
Default to web because it returns the most playback types.
:param bool use_oauth:
Whether or not to authenticate to YouTube.
:param bool allow_cache:
Allows caching of oauth tokens on the machine.
"""
self.context = _default_clients[client]['context']
self.api_key = _default_clients[client]['api_key']
self.access_token = None
self.refresh_token = None
self.use_oauth = use_oauth
self.allow_cache = allow_cache
# Stored as epoch time
self.expires = None
# Try to load from file if specified
if self.use_oauth and self.allow_cache:
# Try to load from file if possible
if os.path.exists(_token_file):
with open(_token_file) as f:
data = json.load(f)
self.access_token = data['access_token']
self.refresh_token = data['refresh_token']
self.expires = data['expires']
self.refresh_bearer_token()
def cache_tokens(self):
"""Cache tokens to file if allowed."""
if not self.allow_cache:
return
data = {
'access_token': self.access_token,
'refresh_token': self.refresh_token,
'expires': self.expires
}
if not os.path.exists(_cache_dir):
os.mkdir(_cache_dir)
with open(_token_file, 'w') as f:
json.dump(data, f)
def refresh_bearer_token(self, force=False):
"""Refreshes the OAuth token if necessary.
:param bool force:
Force-refresh the bearer token.
"""
if not self.use_oauth:
return
# Skip refresh if it's not necessary and not forced
if self.expires > time.time() and not force:
return
# Subtracting 30 seconds is arbitrary to avoid potential time discrepencies
start_time = int(time.time() - 30)
data = {
'client_id': _client_id,
'client_secret': _client_secret,
'grant_type': 'refresh_token',
'refresh_token': self.refresh_token
}
response = request._execute_request(
'https://oauth2.googleapis.com/token',
'POST',
headers={
'Content-Type': 'application/json'
},
data=data
)
response_data = json.loads(response.read())
self.access_token = response_data['access_token']
self.expires = start_time + response_data['expires_in']
self.cache_tokens()
def fetch_bearer_token(self):
"""Fetch an OAuth token."""
# Subtracting 30 seconds is arbitrary to avoid potential time discrepencies
start_time = int(time.time() - 30)
data = {
'client_id': _client_id,
'scope': 'https://www.googleapis.com/auth/youtube'
}
response = request._execute_request(
'https://oauth2.googleapis.com/device/code',
'POST',
headers={
'Content-Type': 'application/json'
},
data=data
)
response_data = json.loads(response.read())
verification_url = response_data['verification_url']
user_code = response_data['user_code']
print(f'Please open {verification_url} and input code {user_code}')
input('Press enter when you have completed this step.')
data = {
'client_id': _client_id,
'client_secret': _client_secret,
'device_code': response_data['device_code'],
'grant_type': 'urn:ietf:params:oauth:grant-type:device_code'
}
response = request._execute_request(
'https://oauth2.googleapis.com/token',
'POST',
headers={
'Content-Type': 'application/json'
},
data=data
)
response_data = json.loads(response.read())
self.access_token = response_data['access_token']
self.refresh_token = response_data['refresh_token']
self.expires = start_time + response_data['expires_in']
self.cache_tokens()
@property
def base_url(self):
"""Return the base url endpoint for the innertube API."""
return 'https://www.youtube.com/youtubei/v1'
@property
def base_data(self):
"""Return the base json data to transmit to the innertube API."""
return {
'context': self.context
}
@property
def base_params(self):
"""Return the base query parameters to transmit to the innertube API."""
return {
'key': self.api_key,
'contentCheckOk': True,
'racyCheckOk': True
}
def _call_api(self, endpoint, query, data):
"""Make a request to a given endpoint with the provided query parameters and data."""
# Remove the API key if oauth is being used.
if self.use_oauth:
del query['key']
endpoint_url = f'{endpoint}?{parse.urlencode(query)}'
headers = {
'Content-Type': 'application/json',
}
# Add the bearer token if applicable
if self.use_oauth:
if self.access_token:
self.refresh_bearer_token()
headers['Authorization'] = f'Bearer {self.access_token}'
else:
self.fetch_bearer_token()
headers['Authorization'] = f'Bearer {self.access_token}'
response = request._execute_request(
endpoint_url,
'POST',
headers=headers,
data=data
)
return json.loads(response.read())
def browse(self):
"""Make a request to the browse endpoint.
TODO: Figure out how we can use this
"""
# endpoint = f'{self.base_url}/browse' # noqa:E800
...
# return self._call_api(endpoint, query, self.base_data) # noqa:E800
def config(self):
"""Make a request to the config endpoint.
TODO: Figure out how we can use this
"""
# endpoint = f'{self.base_url}/config' # noqa:E800
...
# return self._call_api(endpoint, query, self.base_data) # noqa:E800
def guide(self):
"""Make a request to the guide endpoint.
TODO: Figure out how we can use this
"""
# endpoint = f'{self.base_url}/guide' # noqa:E800
...
# return self._call_api(endpoint, query, self.base_data) # noqa:E800
def next(self):
"""Make a request to the next endpoint.
TODO: Figure out how we can use this
"""
# endpoint = f'{self.base_url}/next' # noqa:E800
...
# return self._call_api(endpoint, query, self.base_data) # noqa:E800
def player(self, video_id):
"""Make a request to the player endpoint.
:param str video_id:
The video id to get player info for.
:rtype: dict
:returns:
Raw player info results.
"""
endpoint = f'{self.base_url}/player'
query = {
'videoId': video_id,
}
query.update(self.base_params)
return self._call_api(endpoint, query, self.base_data)
def search(self, search_query, continuation=None):
"""Make a request to the search endpoint.
:param str search_query:
The query to search.
:rtype: dict
:returns:
Raw search query results.
"""
endpoint = f'{self.base_url}/search'
query = {
'query': search_query
}
query.update(self.base_params)
data = {}
if continuation:
data['continuation'] = continuation
data.update(self.base_data)
return self._call_api(endpoint, query, data)
def verify_age(self, video_id):
"""Make a request to the age_verify endpoint.
Notable examples of the types of video this verification step is for:
* https://www.youtube.com/watch?v=QLdAhwSBZ3w
* https://www.youtube.com/watch?v=hc0ZDaAZQT0
:param str video_id:
The video id to get player info for.
:rtype: dict
:returns:
Returns information that includes a URL for bypassing certain restrictions.
"""
endpoint = f'{self.base_url}/verify_age'
data = {
'nextEndpoint': {
'urlEndpoint': {
'url': f'/watch?v={video_id}'
}
},
'setControvercy': True
}
data.update(self.base_data)
result = self._call_api(endpoint, self.base_params, data)
return result
def get_transcript(self, video_id):
"""Make a request to the get_transcript endpoint.
This is likely related to captioning for videos, but is currently untested.
"""
endpoint = f'{self.base_url}/get_transcript'
query = {
'videoId': video_id,
}
query.update(self.base_params)
result = self._call_api(endpoint, query, self.base_data)
return result