TheAnsIs42 commited on
Commit
fa3faa9
·
1 Parent(s): 04b904f

add dict utility, functions not pruned

Browse files

Former-commit-id: cce40bebec78c4bd8ab0ae7fa3adf76edfa54f2b

Files changed (1) hide show
  1. dict_util.py +46 -0
dict_util.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import csv
2
+ import pickle
3
+
4
+ ### NOTICE: csv only accept two colomn input. but accept multi-time input.
5
+
6
+
7
+ # 1_2_3, 1 is action, 2 is supply object, 3 is accept object
8
+ def update_dict_csv(term_dict, f):
9
+ for rows in csv.reader(f):
10
+ if rows[0] in term_dict:
11
+ if rows[1] not in term_dict[rows[0]]:
12
+ term_dict[rows[0]] = term_dict[rows[0]]+[rows[1]]
13
+ else:
14
+ term_dict[rows[0]]=[rows[1]]
15
+ pass
16
+
17
+ def export_dict_csv(term_dict, f):
18
+ for key, val in term_dict.items():
19
+ csv.writer(f).writerow([key, val])
20
+ pass
21
+
22
+ def save_dict_pickle(term_dict, f):
23
+ pickle.dump(term_dict, f, pickle.HIGHEST_PROTOCOL)
24
+
25
+ def update_csv_pickle(pickle_f, csv_f):
26
+ term_dict = pickle.load(pickle_f)
27
+ for rows in csv.reader(csv_f):
28
+ if rows[0] in term_dict:
29
+ if rows[1] not in term_dict[rows[0]]:
30
+ term_dict[rows[0]] = term_dict[rows[0]]+[rows[1]]
31
+ else:
32
+ term_dict[rows[0]]=[rows[1]]
33
+ #save to pickle file, highest protocal to get better performance
34
+ pickle.dump(term_dict, pickle_f, pickle.HIGHEST_PROTOCOL)
35
+
36
+
37
+ #demo
38
+ term_dict_sc2 = {}
39
+ with open("./finetune_data/dict_enzh.csv", 'r', encoding='utf-8') as f:
40
+ update_dict_csv(term_dict_sc2,f)
41
+
42
+ with open("../test.csv", "w", encoding='utf-8') as w:
43
+ export_dict_csv(term_dict_sc2,w)
44
+
45
+ ## for load pickle, just:
46
+ # pickle.load(f)