tomaseo2022 commited on
Commit
9c153d7
·
1 Parent(s): 58607b6

Upload gtoken.py

Browse files
Files changed (1) hide show
  1. gtoken.py +196 -0
gtoken.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ import ast
3
+ import math
4
+ import re
5
+ import time
6
+
7
+ import httpx
8
+
9
+ from googletrans.utils import rshift
10
+
11
+
12
+ class TokenAcquirer:
13
+ """Google Translate API token generator
14
+
15
+ translate.google.com uses a token to authorize the requests. If you are
16
+ not Google, you do have this token and will have to pay for use.
17
+ This class is the result of reverse engineering on the obfuscated and
18
+ minified code used by Google to generate such token.
19
+
20
+ The token is based on a seed which is updated once per hour and on the
21
+ text that will be translated.
22
+ Both are combined - by some strange math - in order to generate a final
23
+ token (e.g. 744915.856682) which is used by the API to validate the
24
+ request.
25
+
26
+ This operation will cause an additional request to get an initial
27
+ token from translate.google.com.
28
+
29
+ Example usage:
30
+ >>> from googletrans.gtoken import TokenAcquirer
31
+ >>> acquirer = TokenAcquirer()
32
+ >>> text = 'test'
33
+ >>> tk = acquirer.do(text)
34
+ >>> tk
35
+ 950629.577246
36
+ """
37
+
38
+ RE_TKK = re.compile(r'tkk:\'(.+?)\'', re.DOTALL)
39
+ RE_RAWTKK = re.compile(r'tkk:\'(.+?)\'', re.DOTALL)
40
+
41
+ def __init__(self, tkk='0', client: httpx.Client = None, host='translate.google.com'):
42
+ self.client = client or httpx.Client()
43
+ self.tkk = tkk
44
+ self.host = host if 'http' in host else 'https://' + host
45
+
46
+ def _update(self):
47
+ """update tkk
48
+ """
49
+ # we don't need to update the base TKK value when it is still valid
50
+ now = math.floor(int(time.time() * 1000) / 3600000.0)
51
+ if self.tkk and int(self.tkk.split('.')[0]) == now:
52
+ return
53
+
54
+ r = self.client.get(self.host)
55
+
56
+ raw_tkk = self.RE_TKK.search(r.text)
57
+ if raw_tkk:
58
+ self.tkk = raw_tkk.group(1)
59
+ return
60
+
61
+ # this will be the same as python code after stripping out a reserved word 'var'
62
+ code = self.RE_TKK.search(r.text).group(1).replace('var ', '')
63
+ # unescape special ascii characters such like a \x3d(=)
64
+ code = code.encode().decode('unicode-escape')
65
+
66
+ if code:
67
+ tree = ast.parse(code)
68
+ visit_return = False
69
+ operator = '+'
70
+ n, keys = 0, dict(a=0, b=0)
71
+ for node in ast.walk(tree):
72
+ if isinstance(node, ast.Assign):
73
+ name = node.targets[0].id
74
+ if name in keys:
75
+ if isinstance(node.value, ast.Num):
76
+ keys[name] = node.value.n
77
+ # the value can sometimes be negative
78
+ elif isinstance(node.value, ast.UnaryOp) and \
79
+ isinstance(node.value.op, ast.USub): # pragma: nocover
80
+ keys[name] = -node.value.operand.n
81
+ elif isinstance(node, ast.Return):
82
+ # parameters should be set after this point
83
+ visit_return = True
84
+ elif visit_return and isinstance(node, ast.Num):
85
+ n = node.n
86
+ elif visit_return and n > 0:
87
+ # the default operator is '+' but implement some more for
88
+ # all possible scenarios
89
+ if isinstance(node, ast.Add): # pragma: nocover
90
+ pass
91
+ elif isinstance(node, ast.Sub): # pragma: nocover
92
+ operator = '-'
93
+ elif isinstance(node, ast.Mult): # pragma: nocover
94
+ operator = '*'
95
+ elif isinstance(node, ast.Pow): # pragma: nocover
96
+ operator = '**'
97
+ elif isinstance(node, ast.BitXor): # pragma: nocover
98
+ operator = '^'
99
+ # a safety way to avoid Exceptions
100
+ clause = compile('{1}{0}{2}'.format(
101
+ operator, keys['a'], keys['b']), '', 'eval')
102
+ value = eval(clause, dict(__builtin__={}))
103
+ result = '{}.{}'.format(n, value)
104
+
105
+ self.tkk = result
106
+
107
+ def _lazy(self, value):
108
+ """like lazy evalution, this method returns a lambda function that
109
+ returns value given.
110
+ We won't be needing this because this seems to have been built for
111
+ code obfuscation.
112
+
113
+ the original code of this method is as follows:
114
+
115
+ ... code-block: javascript
116
+
117
+ var ek = function(a) {
118
+ return function() {
119
+ return a;
120
+ };
121
+ }
122
+ """
123
+ return lambda: value
124
+
125
+ def _xr(self, a, b):
126
+ size_b = len(b)
127
+ c = 0
128
+ while c < size_b - 2:
129
+ d = b[c + 2]
130
+ d = ord(d[0]) - 87 if 'a' <= d else int(d)
131
+ d = rshift(a, d) if '+' == b[c + 1] else a << d
132
+ a = a + d & 4294967295 if '+' == b[c] else a ^ d
133
+
134
+ c += 3
135
+ return a
136
+
137
+ def acquire(self, text):
138
+ a = []
139
+ # Convert text to ints
140
+ for i in text:
141
+ val = ord(i)
142
+ if val < 0x10000:
143
+ a += [val]
144
+ else:
145
+ # Python doesn't natively use Unicode surrogates, so account for those
146
+ a += [
147
+ math.floor((val - 0x10000) / 0x400 + 0xD800),
148
+ math.floor((val - 0x10000) % 0x400 + 0xDC00)
149
+ ]
150
+
151
+ b = self.tkk if self.tkk != '0' else ''
152
+ d = b.split('.')
153
+ b = int(d[0]) if len(d) > 1 else 0
154
+
155
+ # assume e means char code array
156
+ e = []
157
+ g = 0
158
+ size = len(a)
159
+ while g < size:
160
+ l = a[g]
161
+ # just append if l is less than 128(ascii: DEL)
162
+ if l < 128:
163
+ e.append(l)
164
+ # append calculated value if l is less than 2048
165
+ else:
166
+ if l < 2048:
167
+ e.append(l >> 6 | 192)
168
+ else:
169
+ # append calculated value if l matches special condition
170
+ if (l & 64512) == 55296 and g + 1 < size and \
171
+ a[g + 1] & 64512 == 56320:
172
+ g += 1
173
+ l = 65536 + ((l & 1023) << 10) + (a[g] & 1023) # This bracket is important
174
+ e.append(l >> 18 | 240)
175
+ e.append(l >> 12 & 63 | 128)
176
+ else:
177
+ e.append(l >> 12 | 224)
178
+ e.append(l >> 6 & 63 | 128)
179
+ e.append(l & 63 | 128)
180
+ g += 1
181
+ a = b
182
+ for i, value in enumerate(e):
183
+ a += value
184
+ a = self._xr(a, '+-a^+6')
185
+ a = self._xr(a, '+-3^+b+-f')
186
+ a ^= int(d[1]) if len(d) > 1 else 0
187
+ if a < 0: # pragma: nocover
188
+ a = (a & 2147483647) + 2147483648
189
+ a %= 1000000 # int(1E6)
190
+
191
+ return '{}.{}'.format(a, a ^ b)
192
+
193
+ def do(self, text):
194
+ self._update()
195
+ tk = self.acquire(text)
196
+ return tk