Spaces:
Sleeping
Sleeping
TheAnsIs42
commited on
Commit
·
fa3faa9
1
Parent(s):
04b904f
add dict utility, functions not pruned
Browse filesFormer-commit-id: cce40bebec78c4bd8ab0ae7fa3adf76edfa54f2b
- dict_util.py +46 -0
dict_util.py
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import csv
|
2 |
+
import pickle
|
3 |
+
|
4 |
+
### NOTICE: csv only accept two colomn input. but accept multi-time input.
|
5 |
+
|
6 |
+
|
7 |
+
# 1_2_3, 1 is action, 2 is supply object, 3 is accept object
|
8 |
+
def update_dict_csv(term_dict, f):
|
9 |
+
for rows in csv.reader(f):
|
10 |
+
if rows[0] in term_dict:
|
11 |
+
if rows[1] not in term_dict[rows[0]]:
|
12 |
+
term_dict[rows[0]] = term_dict[rows[0]]+[rows[1]]
|
13 |
+
else:
|
14 |
+
term_dict[rows[0]]=[rows[1]]
|
15 |
+
pass
|
16 |
+
|
17 |
+
def export_dict_csv(term_dict, f):
|
18 |
+
for key, val in term_dict.items():
|
19 |
+
csv.writer(f).writerow([key, val])
|
20 |
+
pass
|
21 |
+
|
22 |
+
def save_dict_pickle(term_dict, f):
|
23 |
+
pickle.dump(term_dict, f, pickle.HIGHEST_PROTOCOL)
|
24 |
+
|
25 |
+
def update_csv_pickle(pickle_f, csv_f):
|
26 |
+
term_dict = pickle.load(pickle_f)
|
27 |
+
for rows in csv.reader(csv_f):
|
28 |
+
if rows[0] in term_dict:
|
29 |
+
if rows[1] not in term_dict[rows[0]]:
|
30 |
+
term_dict[rows[0]] = term_dict[rows[0]]+[rows[1]]
|
31 |
+
else:
|
32 |
+
term_dict[rows[0]]=[rows[1]]
|
33 |
+
#save to pickle file, highest protocal to get better performance
|
34 |
+
pickle.dump(term_dict, pickle_f, pickle.HIGHEST_PROTOCOL)
|
35 |
+
|
36 |
+
|
37 |
+
#demo
|
38 |
+
term_dict_sc2 = {}
|
39 |
+
with open("./finetune_data/dict_enzh.csv", 'r', encoding='utf-8') as f:
|
40 |
+
update_dict_csv(term_dict_sc2,f)
|
41 |
+
|
42 |
+
with open("../test.csv", "w", encoding='utf-8') as w:
|
43 |
+
export_dict_csv(term_dict_sc2,w)
|
44 |
+
|
45 |
+
## for load pickle, just:
|
46 |
+
# pickle.load(f)
|