from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoConfig, AutoModelForSequenceClassification, TrainingArguments from shared import CustomTokens, GeneralArguments from dataclasses import dataclass, field from typing import Optional, Union import torch import classify import base64 import re import requests import json import logging logging.basicConfig() logger = logging.getLogger(__name__) # Public innertube key (b64 encoded so that it is not incorrectly flagged) INNERTUBE_KEY = base64.b64decode( b'QUl6YVN5QU9fRkoyU2xxVThRNFNURUhMR0NpbHdfWTlfMTFxY1c4').decode() YT_CONTEXT = { 'client': { 'userAgent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36,gzip(gfe)', 'clientName': 'WEB', 'clientVersion': '2.20211221.00.00', } } _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;\s*(?:var\s+meta|