Spaces:

amra-ai
/

studies

Runtime error

App Files Files Community

Roland Ding commited on Sep 12, 2023

Commit

b3ed092

•

1 Parent(s): 13543e6

8.8.21.59 excluded terminal display for all data transformation functions.

Browse files

Files changed (1) hide show

utility.py +25 -10

utility.py CHANGED Viewed

@@ -1,11 +1,14 @@
 import json
 import regex as re
 from application import *
 from pdfminer.high_level import extract_text
 from pdfminer.pdfparser import PDFParser
 from pdfminer.pdfdocument import PDFDocument
 '''
 universal system functions
 '''
@@ -205,13 +208,13 @@ def replace_symbols(s):
     s = s.replace(";","")
     s = s.replace("'","")
     s = s.replace('"',"")
-    return remove_symbols(s)
 '''
 following functions are for dynamodb data manipulation
 '''
-@terminal_print
 def db_map_to_py_dict(db_map):
     '''
     this function convert dynamodb map data structure to python dictionary
@@ -248,7 +251,7 @@ def db_map_to_py_dict(db_map):
     return py_dict
-@terminal_print
 def py_dict_to_db_map(py_dict):
     '''
     this function convert python dictionary to dynamodb map data structure
@@ -269,20 +272,20 @@ def py_dict_to_db_map(py_dict):
         if type(value) is str:
             db_map[key] = {"S":value}
         elif type(value) is int or type(value) is float:
-            db_map[key] = {"N":value}
         elif type(value) is dict:
             db_map[key] = {"M":py_dict_to_db_map(value)}
         elif type(value) is list:
             db_map[key] = {"L":py_list_to_db_list(value)}
         elif type(value) is bytes:
-            db_map[key] = {"BS":value}
         elif type(value) is bool:
             db_map[key] = {"BOOL":value}
         elif value is None:
             db_map[key] = {"NULL":True}
     return db_map
-@terminal_print
 def db_list_to_py_list(db_list):
     '''
     this function convert dynamodb list data structure to python list
@@ -304,14 +307,25 @@ def db_list_to_py_list(db_list):
                 py_list.append(db_map_to_py_dict(v))
             elif t == "L":
                 py_list.append(db_list_to_py_list(v))
-            elif t =="N" or t =="S" or t =="B" or t =="BOOL" or t =="NULL" or t =="SS" or t =="NS" or t =="BS":
                 py_list.append(v)
             else:
                 py_list.append(db_map_to_py_dict(v))
     return py_list
-@terminal_print
 def py_list_to_db_list(py_list):
     '''
     this function convert python list to dynamodb list data structure
@@ -389,7 +403,7 @@ following functions are used for business logic. (to be moved to business logic
 '''
 @terminal_print
-def est_cost(n_tokens,rate):
     '''
     this function calculate the estimated cost of the translation
     please note that the rate is per 1000 tokens.
@@ -397,7 +411,7 @@ def est_cost(n_tokens,rate):
     Parameters
     ----------
-    n_tokens : int
         number of tokens in the text
     rate : float
         rate per 1000 tokens
@@ -406,4 +420,5 @@ def est_cost(n_tokens,rate):
     -------
     float
         estimated cost of the translation'''
     return round(rate*n_tokens/1000,4)

 import json
 import regex as re
+import tiktoken
 from application import *
 from pdfminer.high_level import extract_text
 from pdfminer.pdfparser import PDFParser
 from pdfminer.pdfdocument import PDFDocument
+encoding = tiktoken.get_encoding("cl100k_base")
 '''
 universal system functions
 '''
     s = s.replace(";","")
     s = s.replace("'","")
     s = s.replace('"',"")
+    return s
 '''
 following functions are for dynamodb data manipulation
 '''
+# @terminal_print
 def db_map_to_py_dict(db_map):
     '''
     this function convert dynamodb map data structure to python dictionary
     return py_dict
+# @terminal_print
 def py_dict_to_db_map(py_dict):
     '''
     this function convert python dictionary to dynamodb map data structure
         if type(value) is str:
             db_map[key] = {"S":value}
         elif type(value) is int or type(value) is float:
+            db_map[key] = {"N":str(value)}
         elif type(value) is dict:
             db_map[key] = {"M":py_dict_to_db_map(value)}
         elif type(value) is list:
             db_map[key] = {"L":py_list_to_db_list(value)}
         elif type(value) is bytes:
+            db_map[key] = {"B":value}
         elif type(value) is bool:
             db_map[key] = {"BOOL":value}
         elif value is None:
             db_map[key] = {"NULL":True}
     return db_map
+# @terminal_print
 def db_list_to_py_list(db_list):
     '''
     this function convert dynamodb list data structure to python list
                 py_list.append(db_map_to_py_dict(v))
             elif t == "L":
                 py_list.append(db_list_to_py_list(v))
+            elif t =="N":
+                if "." in v:
+                    py_list.append(float(v))
+                else:
+                    py_list.append(int(v))
+            elif t =="S" or t =="BOOL" or t =="SS" or t =="NS":
                 py_list.append(v)
+            elif t =="B" or t =="BS":
+                py_list.append(bytes(v,"utf-8"))
+            elif t =="NULL":
+                py_list.append(None)
+            elif t =="BOOL":
+                py_list.append(bool(v))
             else:
                 py_list.append(db_map_to_py_dict(v))
     return py_list
+# @terminal_print
 def py_list_to_db_list(py_list):
     '''
     this function convert python list to dynamodb list data structure
 '''
 @terminal_print
+def est_cost(text,rate):
     '''
     this function calculate the estimated cost of the translation
     please note that the rate is per 1000 tokens.
     Parameters
     ----------
+    text : str
         number of tokens in the text
     rate : float
         rate per 1000 tokens
     -------
     float
         estimated cost of the translation'''
+    n_tokens = len(encoding.encode(text))
     return round(rate*n_tokens/1000,4)