# coding:utf-8 # author LuShan # version : 1.1.9 import json, requests, random, re from urllib.parse import quote import urllib3 import logging from trans_constant import LANGUAGES, DEFAULT_SERVICE_URLS log = logging.getLogger(__name__) log.addHandler(logging.NullHandler()) urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) URLS_SUFFIX = [re.search('translate.google.(.*)', url.strip()).group(1) for url in DEFAULT_SERVICE_URLS] URL_SUFFIX_DEFAULT = 'com' class google_new_transError(Exception): """Exception that uses context to present a meaningful error message""" def __init__(self, msg=None, **kwargs): self.tts = kwargs.pop('tts', None) self.rsp = kwargs.pop('response', None) if msg: self.msg = msg elif self.tts is not None: self.msg = self.infer_msg(self.tts, self.rsp) else: self.msg = None super(google_new_transError, self).__init__(self.msg) def infer_msg(self, tts, rsp=None): cause = "Unknown" if rsp is None: premise = "Failed to connect" return "{}. Probable cause: {}".format(premise, "timeout") # if tts.tld != 'com': # host = _translate_url(tld=tts.tld) # cause = "Host '{}' is not reachable".format(host) else: status = rsp.status_code reason = rsp.reason premise = "{:d} ({}) from TTS API".format(status, reason) if status == 403: cause = "Bad token or upstream API changes" elif status == 200 and not tts.lang_check: cause = "No audio stream in response. Unsupported language '%s'" % self.tts.lang elif status >= 500: cause = "Uptream API error. Try again later." return "{}. Probable cause: {}".format(premise, cause) class google_translator: ''' You can use 108 language in target and source,details view LANGUAGES. Target language: like 'en'、'zh'、'th'... :param url_suffix: The source text(s) to be translated. Batch translation is supported via sequence input. The value should be one of the url_suffix listed in : `DEFAULT_SERVICE_URLS` :type url_suffix: UTF-8 :class:`str`; :class:`unicode`; string sequence (list, tuple, iterator, generator) :param text: The source text(s) to be translated. :type text: UTF-8 :class:`str`; :class:`unicode`; :param lang_tgt: The language to translate the source text into. The value should be one of the language codes listed in : `LANGUAGES` :type lang_tgt: :class:`str`; :class:`unicode` :param lang_src: The language of the source text. The value should be one of the language codes listed in :const:`googletrans.LANGUAGES` If a language is not specified, the system will attempt to identify the source language automatically. :type lang_src: :class:`str`; :class:`unicode` :param timeout: Timeout Will be used for every request. :type timeout: number or a double of numbers :param proxies: proxies Will be used for every request. :type proxies: class : dict; like: {'http': 'http:171.112.169.47:19934/', 'https': 'https:171.112.169.47:19934/'} ''' def __init__(self, url_suffix="com", timeout=5, proxies=None): self.proxies = proxies if url_suffix not in URLS_SUFFIX: self.url_suffix = URL_SUFFIX_DEFAULT else: self.url_suffix = url_suffix url_base = "https://translate.google.{}".format(self.url_suffix) self.url = url_base + "/_/TranslateWebserverUi/data/batchexecute" self.timeout = timeout def _package_rpc(self, text, lang_src='auto', lang_tgt='auto'): GOOGLE_TTS_RPC = ["MkEWBc"] parameter = [[text.strip(), lang_src, lang_tgt, True], [1]] escaped_parameter = json.dumps(parameter, separators=(',', ':')) rpc = [[[random.choice(GOOGLE_TTS_RPC), escaped_parameter, None, "generic"]]] espaced_rpc = json.dumps(rpc, separators=(',', ':')) # text_urldecode = quote(text.strip()) freq_initial = "f.req={}&".format(quote(espaced_rpc)) freq = freq_initial return freq def translate(self, text, lang_tgt='auto', lang_src='auto', pronounce=False): try: lang = LANGUAGES[lang_src] except: lang_src = 'auto' try: lang = LANGUAGES[lang_tgt] except: lang_src = 'auto' text = str(text) if len(text) >= 5000: return "Warning: Can only detect less than 5000 characters" if len(text) == 0: return "" headers = { "Referer": "http://translate.google.{}/".format(self.url_suffix), "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/47.0.2526.106 Safari/537.36", "Content-Type": "application/x-www-form-urlencoded;charset=utf-8" } freq = self._package_rpc(text, lang_src, lang_tgt) response = requests.Request(method='POST', url=self.url, data=freq, headers=headers, ) try: if self.proxies == None or type(self.proxies) != dict: self.proxies = {} with requests.Session() as s: s.proxies = self.proxies r = s.send(request=response.prepare(), verify=False, timeout=self.timeout) for line in r.iter_lines(chunk_size=1024): decoded_line = line.decode('utf-8') if "MkEWBc" in decoded_line: try: response = decoded_line response = json.loads(response) response = list(response) response = json.loads(response[0][2]) response_ = list(response) response = response_[1][0] if len(response) == 1: if len(response[0]) > 5: sentences = response[0][5] else: ## only url sentences = response[0][0] if pronounce == False: return sentences elif pronounce == True: return [sentences,None,None] translate_text = "" for sentence in sentences: sentence = sentence[0] translate_text += sentence.strip() + ' ' translate_text = translate_text if pronounce == False: return translate_text elif pronounce == True: pronounce_src = (response_[0][0]) pronounce_tgt = (response_[1][0][0][1]) return [translate_text, pronounce_src, pronounce_tgt] elif len(response) == 2: sentences = [] for i in response: sentences.append(i[0]) if pronounce == False: return sentences elif pronounce == True: pronounce_src = (response_[0][0]) pronounce_tgt = (response_[1][0][0][1]) return [sentences, pronounce_src, pronounce_tgt] except Exception as e: raise e r.raise_for_status() except requests.exceptions.ConnectTimeout as e: raise e except requests.exceptions.HTTPError as e: # Request successful, bad response raise google_new_transError(tts=self, response=r) except requests.exceptions.RequestException as e: # Request failed raise google_new_transError(tts=self) def detect(self, text): text = str(text) if len(text) >= 5000: return log.debug("Warning: Can only detect less than 5000 characters") if len(text) == 0: return "" headers = { "Referer": "http://translate.google.{}/".format(self.url_suffix), "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/47.0.2526.106 Safari/537.36", "Content-Type": "application/x-www-form-urlencoded;charset=utf-8" } freq = self._package_rpc(text) response = requests.Request(method='POST', url=self.url, data=freq, headers=headers) try: if self.proxies == None or type(self.proxies) != dict: self.proxies = {} with requests.Session() as s: s.proxies = self.proxies r = s.send(request=response.prepare(), verify=False, timeout=self.timeout) for line in r.iter_lines(chunk_size=1024): decoded_line = line.decode('utf-8') if "MkEWBc" in decoded_line: # regex_str = r"\[\[\"wrb.fr\",\"MkEWBc\",\"\[\[(.*).*?,\[\[\[" try: # data_got = re.search(regex_str,decoded_line).group(1) response = (decoded_line + ']') response = json.loads(response) response = list(response) response = json.loads(response[0][2]) response = list(response) detect_lang = response[0][2] except Exception: raise Exception # data_got = data_got.split('\\\"]')[0] return [detect_lang, LANGUAGES[detect_lang.lower()]] r.raise_for_status() except requests.exceptions.HTTPError as e: # Request successful, bad response log.debug(str(e)) raise google_new_transError(tts=self, response=r) except requests.exceptions.RequestException as e: # Request failed log.debug(str(e)) raise google_new_transError(tts=self)