Roland Ding commited on
Commit
b3ed092
1 Parent(s): 13543e6

8.8.21.59 excluded terminal display for all data transformation functions.

Browse files
Files changed (1) hide show
  1. utility.py +25 -10
utility.py CHANGED
@@ -1,11 +1,14 @@
1
  import json
2
  import regex as re
 
3
 
4
  from application import *
5
  from pdfminer.high_level import extract_text
6
  from pdfminer.pdfparser import PDFParser
7
  from pdfminer.pdfdocument import PDFDocument
8
 
 
 
9
  '''
10
  universal system functions
11
  '''
@@ -205,13 +208,13 @@ def replace_symbols(s):
205
  s = s.replace(";","")
206
  s = s.replace("'","")
207
  s = s.replace('"',"")
208
- return remove_symbols(s)
209
 
210
  '''
211
  following functions are for dynamodb data manipulation
212
  '''
213
 
214
- @terminal_print
215
  def db_map_to_py_dict(db_map):
216
  '''
217
  this function convert dynamodb map data structure to python dictionary
@@ -248,7 +251,7 @@ def db_map_to_py_dict(db_map):
248
 
249
  return py_dict
250
 
251
- @terminal_print
252
  def py_dict_to_db_map(py_dict):
253
  '''
254
  this function convert python dictionary to dynamodb map data structure
@@ -269,20 +272,20 @@ def py_dict_to_db_map(py_dict):
269
  if type(value) is str:
270
  db_map[key] = {"S":value}
271
  elif type(value) is int or type(value) is float:
272
- db_map[key] = {"N":value}
273
  elif type(value) is dict:
274
  db_map[key] = {"M":py_dict_to_db_map(value)}
275
  elif type(value) is list:
276
  db_map[key] = {"L":py_list_to_db_list(value)}
277
  elif type(value) is bytes:
278
- db_map[key] = {"BS":value}
279
  elif type(value) is bool:
280
  db_map[key] = {"BOOL":value}
281
  elif value is None:
282
  db_map[key] = {"NULL":True}
283
  return db_map
284
 
285
- @terminal_print
286
  def db_list_to_py_list(db_list):
287
  '''
288
  this function convert dynamodb list data structure to python list
@@ -304,14 +307,25 @@ def db_list_to_py_list(db_list):
304
  py_list.append(db_map_to_py_dict(v))
305
  elif t == "L":
306
  py_list.append(db_list_to_py_list(v))
307
- elif t =="N" or t =="S" or t =="B" or t =="BOOL" or t =="NULL" or t =="SS" or t =="NS" or t =="BS":
 
 
 
 
 
308
  py_list.append(v)
 
 
 
 
 
 
309
  else:
310
  py_list.append(db_map_to_py_dict(v))
311
 
312
  return py_list
313
 
314
- @terminal_print
315
  def py_list_to_db_list(py_list):
316
  '''
317
  this function convert python list to dynamodb list data structure
@@ -389,7 +403,7 @@ following functions are used for business logic. (to be moved to business logic
389
  '''
390
 
391
  @terminal_print
392
- def est_cost(n_tokens,rate):
393
  '''
394
  this function calculate the estimated cost of the translation
395
  please note that the rate is per 1000 tokens.
@@ -397,7 +411,7 @@ def est_cost(n_tokens,rate):
397
 
398
  Parameters
399
  ----------
400
- n_tokens : int
401
  number of tokens in the text
402
  rate : float
403
  rate per 1000 tokens
@@ -406,4 +420,5 @@ def est_cost(n_tokens,rate):
406
  -------
407
  float
408
  estimated cost of the translation'''
 
409
  return round(rate*n_tokens/1000,4)
 
1
  import json
2
  import regex as re
3
+ import tiktoken
4
 
5
  from application import *
6
  from pdfminer.high_level import extract_text
7
  from pdfminer.pdfparser import PDFParser
8
  from pdfminer.pdfdocument import PDFDocument
9
 
10
+ encoding = tiktoken.get_encoding("cl100k_base")
11
+
12
  '''
13
  universal system functions
14
  '''
 
208
  s = s.replace(";","")
209
  s = s.replace("'","")
210
  s = s.replace('"',"")
211
+ return s
212
 
213
  '''
214
  following functions are for dynamodb data manipulation
215
  '''
216
 
217
+ # @terminal_print
218
  def db_map_to_py_dict(db_map):
219
  '''
220
  this function convert dynamodb map data structure to python dictionary
 
251
 
252
  return py_dict
253
 
254
+ # @terminal_print
255
  def py_dict_to_db_map(py_dict):
256
  '''
257
  this function convert python dictionary to dynamodb map data structure
 
272
  if type(value) is str:
273
  db_map[key] = {"S":value}
274
  elif type(value) is int or type(value) is float:
275
+ db_map[key] = {"N":str(value)}
276
  elif type(value) is dict:
277
  db_map[key] = {"M":py_dict_to_db_map(value)}
278
  elif type(value) is list:
279
  db_map[key] = {"L":py_list_to_db_list(value)}
280
  elif type(value) is bytes:
281
+ db_map[key] = {"B":value}
282
  elif type(value) is bool:
283
  db_map[key] = {"BOOL":value}
284
  elif value is None:
285
  db_map[key] = {"NULL":True}
286
  return db_map
287
 
288
+ # @terminal_print
289
  def db_list_to_py_list(db_list):
290
  '''
291
  this function convert dynamodb list data structure to python list
 
307
  py_list.append(db_map_to_py_dict(v))
308
  elif t == "L":
309
  py_list.append(db_list_to_py_list(v))
310
+ elif t =="N":
311
+ if "." in v:
312
+ py_list.append(float(v))
313
+ else:
314
+ py_list.append(int(v))
315
+ elif t =="S" or t =="BOOL" or t =="SS" or t =="NS":
316
  py_list.append(v)
317
+ elif t =="B" or t =="BS":
318
+ py_list.append(bytes(v,"utf-8"))
319
+ elif t =="NULL":
320
+ py_list.append(None)
321
+ elif t =="BOOL":
322
+ py_list.append(bool(v))
323
  else:
324
  py_list.append(db_map_to_py_dict(v))
325
 
326
  return py_list
327
 
328
+ # @terminal_print
329
  def py_list_to_db_list(py_list):
330
  '''
331
  this function convert python list to dynamodb list data structure
 
403
  '''
404
 
405
  @terminal_print
406
+ def est_cost(text,rate):
407
  '''
408
  this function calculate the estimated cost of the translation
409
  please note that the rate is per 1000 tokens.
 
411
 
412
  Parameters
413
  ----------
414
+ text : str
415
  number of tokens in the text
416
  rate : float
417
  rate per 1000 tokens
 
420
  -------
421
  float
422
  estimated cost of the translation'''
423
+ n_tokens = len(encoding.encode(text))
424
  return round(rate*n_tokens/1000,4)