Spaces:
Sleeping
Sleeping
worldqwq
commited on
Commit
•
44646a0
1
Parent(s):
bc31908
Attempt spellcheck fix with levenshtein threshold
Browse filesFormer-commit-id: 4a3b6a8f34894f3bfbe27d14e793161bdb0c02c9
- SRT.py +11 -1
- pipeline.py +1 -1
SRT.py
CHANGED
@@ -371,8 +371,18 @@ class SRT_script():
|
|
371 |
[real_word, pos] = self.get_real_word(word)
|
372 |
if not dict.check(word[:pos]):
|
373 |
suggest = term_spellDict.suggest(real_word)
|
374 |
-
if suggest: # relax spell check
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
375 |
new_word = word.replace(word[:pos],suggest[0])
|
|
|
|
|
376 |
else:
|
377 |
new_word = word
|
378 |
ready_words[i] = new_word
|
|
|
371 |
[real_word, pos] = self.get_real_word(word)
|
372 |
if not dict.check(word[:pos]):
|
373 |
suggest = term_spellDict.suggest(real_word)
|
374 |
+
if suggest and enchant.utils.levenshtein(word, suggest[0]) < (len(word)+len(suggest[0]))/4: # relax spell check
|
375 |
+
|
376 |
+
#with open("dislog.log","a") as log:
|
377 |
+
# if not os.path.exists("dislog.log"):
|
378 |
+
# log.write("word \t suggest \t levenshtein \n")
|
379 |
+
# log.write(word + "\t" + suggest[0] + "\t" + str(enchant.utils.levenshtein(word, suggest[0]))+'\n')
|
380 |
+
print(word + ":" + suggest[0] + ":---:levenshtein:" + str(enchant.utils.levenshtein(word, suggest[0])))
|
381 |
+
|
382 |
+
|
383 |
new_word = word.replace(word[:pos],suggest[0])
|
384 |
+
else:
|
385 |
+
new_word = word
|
386 |
else:
|
387 |
new_word = word
|
388 |
ready_words[i] = new_word
|
pipeline.py
CHANGED
@@ -255,7 +255,7 @@ def main():
|
|
255 |
|
256 |
# SRT class preprocess
|
257 |
srt.form_whole_sentence()
|
258 |
-
|
259 |
srt.correct_with_force_term()
|
260 |
srt.write_srt_file_src(srt_file_en)
|
261 |
script_input = srt.get_source_only()
|
|
|
255 |
|
256 |
# SRT class preprocess
|
257 |
srt.form_whole_sentence()
|
258 |
+
srt.spell_check_term()
|
259 |
srt.correct_with_force_term()
|
260 |
srt.write_srt_file_src(srt_file_en)
|
261 |
script_input = srt.get_source_only()
|