Spaces:
Sleeping
Sleeping
DWizard
commited on
Commit
•
d26fe45
1
Parent(s):
ac9ac97
prune and increase robustness
Browse filesFormer-commit-id: 3c421a28145741c17e1a7d09339b205da541996e
- dict_util.py +5 -7
- src/srt_util/srt.py +3 -3
dict_util.py
CHANGED
@@ -55,12 +55,8 @@ with open("../test.csv", "w", encoding='utf-8') as w:
|
|
55 |
# pickle.load(f)
|
56 |
|
57 |
|
58 |
-
def form_dict(
|
59 |
final_dict = {}
|
60 |
-
with open(src_path, 'r', encoding="utf-8") as file:
|
61 |
-
src_dict = list(csv.reader(file, delimiter=","))
|
62 |
-
with open(tgt_path, 'r', encoding="utf-8") as file:
|
63 |
-
tgt_dict = list(csv.reader(file, delimiter="," ))
|
64 |
for idx, value in enumerate(src_dict):
|
65 |
for item in value:
|
66 |
final_dict.update({item:tgt_dict[idx]})
|
@@ -69,8 +65,10 @@ def form_dict(src_path,tgt_path) -> dict:
|
|
69 |
|
70 |
class term_dict(dict):
|
71 |
def __init__(self, path, src_lang, tgt_lang) -> None:
|
72 |
-
|
73 |
-
|
|
|
|
|
74 |
super().__init__(form_dict(src_dict, tgt_dict))
|
75 |
|
76 |
|
|
|
55 |
# pickle.load(f)
|
56 |
|
57 |
|
58 |
+
def form_dict(src_dict:list, tgt_dict:list) -> dict:
|
59 |
final_dict = {}
|
|
|
|
|
|
|
|
|
60 |
for idx, value in enumerate(src_dict):
|
61 |
for item in value:
|
62 |
final_dict.update({item:tgt_dict[idx]})
|
|
|
65 |
|
66 |
class term_dict(dict):
|
67 |
def __init__(self, path, src_lang, tgt_lang) -> None:
|
68 |
+
with open(f"{path}/{src_lang}.csv", 'r', encoding="utf-8") as file:
|
69 |
+
src_dict = list(csv.reader(file, delimiter=","))
|
70 |
+
with open(f"{path}/{tgt_lang}.csv", 'r', encoding="utf-8") as file:
|
71 |
+
tgt_dict = list(csv.reader(file, delimiter="," ))
|
72 |
super().__init__(form_dict(src_dict, tgt_dict))
|
73 |
|
74 |
|
src/srt_util/srt.py
CHANGED
@@ -160,13 +160,13 @@ class SrtScript(object):
|
|
160 |
self.segments = [SrtSegment(self.src_lang, self.tgt_lang, seg) for seg in segments]
|
161 |
|
162 |
if self.domain != "General":
|
163 |
-
if os.path.exists(f"{dict_path}/{self.domain}")
|
|
|
164 |
# TODO: load dictionary
|
165 |
self.dict = dict_util.term_dict(f"{dict_path}/{self.domain}", src_lang, tgt_lang)
|
166 |
-
print(self.dict["robo"])
|
167 |
...
|
168 |
else:
|
169 |
-
logging.error(f"domain {self.domain} doesn't exist, fallback to general domain, this will disable correct_with_force_term and spell_check_term")
|
170 |
self.domain = "General"
|
171 |
|
172 |
|
|
|
160 |
self.segments = [SrtSegment(self.src_lang, self.tgt_lang, seg) for seg in segments]
|
161 |
|
162 |
if self.domain != "General":
|
163 |
+
if os.path.exists(f"{dict_path}/{self.domain}") and\
|
164 |
+
os.path.exists(f"{dict_path}/{self.domain}/{src_lang}.csv") and os.path.exists(f"{dict_path}/{self.domain}/{tgt_lang}.csv" ):
|
165 |
# TODO: load dictionary
|
166 |
self.dict = dict_util.term_dict(f"{dict_path}/{self.domain}", src_lang, tgt_lang)
|
|
|
167 |
...
|
168 |
else:
|
169 |
+
logging.error(f"domain {self.domain} or related dictionary({src_lang} or {tgt_lang}) doesn't exist, fallback to general domain, this will disable correct_with_force_term and spell_check_term")
|
170 |
self.domain = "General"
|
171 |
|
172 |
|