Spaces:
Runtime error
Runtime error
Commit
·
9c153d7
1
Parent(s):
58607b6
Upload gtoken.py
Browse files
gtoken.py
ADDED
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
import ast
|
3 |
+
import math
|
4 |
+
import re
|
5 |
+
import time
|
6 |
+
|
7 |
+
import httpx
|
8 |
+
|
9 |
+
from googletrans.utils import rshift
|
10 |
+
|
11 |
+
|
12 |
+
class TokenAcquirer:
|
13 |
+
"""Google Translate API token generator
|
14 |
+
|
15 |
+
translate.google.com uses a token to authorize the requests. If you are
|
16 |
+
not Google, you do have this token and will have to pay for use.
|
17 |
+
This class is the result of reverse engineering on the obfuscated and
|
18 |
+
minified code used by Google to generate such token.
|
19 |
+
|
20 |
+
The token is based on a seed which is updated once per hour and on the
|
21 |
+
text that will be translated.
|
22 |
+
Both are combined - by some strange math - in order to generate a final
|
23 |
+
token (e.g. 744915.856682) which is used by the API to validate the
|
24 |
+
request.
|
25 |
+
|
26 |
+
This operation will cause an additional request to get an initial
|
27 |
+
token from translate.google.com.
|
28 |
+
|
29 |
+
Example usage:
|
30 |
+
>>> from googletrans.gtoken import TokenAcquirer
|
31 |
+
>>> acquirer = TokenAcquirer()
|
32 |
+
>>> text = 'test'
|
33 |
+
>>> tk = acquirer.do(text)
|
34 |
+
>>> tk
|
35 |
+
950629.577246
|
36 |
+
"""
|
37 |
+
|
38 |
+
RE_TKK = re.compile(r'tkk:\'(.+?)\'', re.DOTALL)
|
39 |
+
RE_RAWTKK = re.compile(r'tkk:\'(.+?)\'', re.DOTALL)
|
40 |
+
|
41 |
+
def __init__(self, tkk='0', client: httpx.Client = None, host='translate.google.com'):
|
42 |
+
self.client = client or httpx.Client()
|
43 |
+
self.tkk = tkk
|
44 |
+
self.host = host if 'http' in host else 'https://' + host
|
45 |
+
|
46 |
+
def _update(self):
|
47 |
+
"""update tkk
|
48 |
+
"""
|
49 |
+
# we don't need to update the base TKK value when it is still valid
|
50 |
+
now = math.floor(int(time.time() * 1000) / 3600000.0)
|
51 |
+
if self.tkk and int(self.tkk.split('.')[0]) == now:
|
52 |
+
return
|
53 |
+
|
54 |
+
r = self.client.get(self.host)
|
55 |
+
|
56 |
+
raw_tkk = self.RE_TKK.search(r.text)
|
57 |
+
if raw_tkk:
|
58 |
+
self.tkk = raw_tkk.group(1)
|
59 |
+
return
|
60 |
+
|
61 |
+
# this will be the same as python code after stripping out a reserved word 'var'
|
62 |
+
code = self.RE_TKK.search(r.text).group(1).replace('var ', '')
|
63 |
+
# unescape special ascii characters such like a \x3d(=)
|
64 |
+
code = code.encode().decode('unicode-escape')
|
65 |
+
|
66 |
+
if code:
|
67 |
+
tree = ast.parse(code)
|
68 |
+
visit_return = False
|
69 |
+
operator = '+'
|
70 |
+
n, keys = 0, dict(a=0, b=0)
|
71 |
+
for node in ast.walk(tree):
|
72 |
+
if isinstance(node, ast.Assign):
|
73 |
+
name = node.targets[0].id
|
74 |
+
if name in keys:
|
75 |
+
if isinstance(node.value, ast.Num):
|
76 |
+
keys[name] = node.value.n
|
77 |
+
# the value can sometimes be negative
|
78 |
+
elif isinstance(node.value, ast.UnaryOp) and \
|
79 |
+
isinstance(node.value.op, ast.USub): # pragma: nocover
|
80 |
+
keys[name] = -node.value.operand.n
|
81 |
+
elif isinstance(node, ast.Return):
|
82 |
+
# parameters should be set after this point
|
83 |
+
visit_return = True
|
84 |
+
elif visit_return and isinstance(node, ast.Num):
|
85 |
+
n = node.n
|
86 |
+
elif visit_return and n > 0:
|
87 |
+
# the default operator is '+' but implement some more for
|
88 |
+
# all possible scenarios
|
89 |
+
if isinstance(node, ast.Add): # pragma: nocover
|
90 |
+
pass
|
91 |
+
elif isinstance(node, ast.Sub): # pragma: nocover
|
92 |
+
operator = '-'
|
93 |
+
elif isinstance(node, ast.Mult): # pragma: nocover
|
94 |
+
operator = '*'
|
95 |
+
elif isinstance(node, ast.Pow): # pragma: nocover
|
96 |
+
operator = '**'
|
97 |
+
elif isinstance(node, ast.BitXor): # pragma: nocover
|
98 |
+
operator = '^'
|
99 |
+
# a safety way to avoid Exceptions
|
100 |
+
clause = compile('{1}{0}{2}'.format(
|
101 |
+
operator, keys['a'], keys['b']), '', 'eval')
|
102 |
+
value = eval(clause, dict(__builtin__={}))
|
103 |
+
result = '{}.{}'.format(n, value)
|
104 |
+
|
105 |
+
self.tkk = result
|
106 |
+
|
107 |
+
def _lazy(self, value):
|
108 |
+
"""like lazy evalution, this method returns a lambda function that
|
109 |
+
returns value given.
|
110 |
+
We won't be needing this because this seems to have been built for
|
111 |
+
code obfuscation.
|
112 |
+
|
113 |
+
the original code of this method is as follows:
|
114 |
+
|
115 |
+
... code-block: javascript
|
116 |
+
|
117 |
+
var ek = function(a) {
|
118 |
+
return function() {
|
119 |
+
return a;
|
120 |
+
};
|
121 |
+
}
|
122 |
+
"""
|
123 |
+
return lambda: value
|
124 |
+
|
125 |
+
def _xr(self, a, b):
|
126 |
+
size_b = len(b)
|
127 |
+
c = 0
|
128 |
+
while c < size_b - 2:
|
129 |
+
d = b[c + 2]
|
130 |
+
d = ord(d[0]) - 87 if 'a' <= d else int(d)
|
131 |
+
d = rshift(a, d) if '+' == b[c + 1] else a << d
|
132 |
+
a = a + d & 4294967295 if '+' == b[c] else a ^ d
|
133 |
+
|
134 |
+
c += 3
|
135 |
+
return a
|
136 |
+
|
137 |
+
def acquire(self, text):
|
138 |
+
a = []
|
139 |
+
# Convert text to ints
|
140 |
+
for i in text:
|
141 |
+
val = ord(i)
|
142 |
+
if val < 0x10000:
|
143 |
+
a += [val]
|
144 |
+
else:
|
145 |
+
# Python doesn't natively use Unicode surrogates, so account for those
|
146 |
+
a += [
|
147 |
+
math.floor((val - 0x10000) / 0x400 + 0xD800),
|
148 |
+
math.floor((val - 0x10000) % 0x400 + 0xDC00)
|
149 |
+
]
|
150 |
+
|
151 |
+
b = self.tkk if self.tkk != '0' else ''
|
152 |
+
d = b.split('.')
|
153 |
+
b = int(d[0]) if len(d) > 1 else 0
|
154 |
+
|
155 |
+
# assume e means char code array
|
156 |
+
e = []
|
157 |
+
g = 0
|
158 |
+
size = len(a)
|
159 |
+
while g < size:
|
160 |
+
l = a[g]
|
161 |
+
# just append if l is less than 128(ascii: DEL)
|
162 |
+
if l < 128:
|
163 |
+
e.append(l)
|
164 |
+
# append calculated value if l is less than 2048
|
165 |
+
else:
|
166 |
+
if l < 2048:
|
167 |
+
e.append(l >> 6 | 192)
|
168 |
+
else:
|
169 |
+
# append calculated value if l matches special condition
|
170 |
+
if (l & 64512) == 55296 and g + 1 < size and \
|
171 |
+
a[g + 1] & 64512 == 56320:
|
172 |
+
g += 1
|
173 |
+
l = 65536 + ((l & 1023) << 10) + (a[g] & 1023) # This bracket is important
|
174 |
+
e.append(l >> 18 | 240)
|
175 |
+
e.append(l >> 12 & 63 | 128)
|
176 |
+
else:
|
177 |
+
e.append(l >> 12 | 224)
|
178 |
+
e.append(l >> 6 & 63 | 128)
|
179 |
+
e.append(l & 63 | 128)
|
180 |
+
g += 1
|
181 |
+
a = b
|
182 |
+
for i, value in enumerate(e):
|
183 |
+
a += value
|
184 |
+
a = self._xr(a, '+-a^+6')
|
185 |
+
a = self._xr(a, '+-3^+b+-f')
|
186 |
+
a ^= int(d[1]) if len(d) > 1 else 0
|
187 |
+
if a < 0: # pragma: nocover
|
188 |
+
a = (a & 2147483647) + 2147483648
|
189 |
+
a %= 1000000 # int(1E6)
|
190 |
+
|
191 |
+
return '{}.{}'.format(a, a ^ b)
|
192 |
+
|
193 |
+
def do(self, text):
|
194 |
+
self._update()
|
195 |
+
tk = self.acquire(text)
|
196 |
+
return tk
|