worldqwq commited on
Commit
44646a0
1 Parent(s): bc31908

Attempt spellcheck fix with levenshtein threshold

Browse files

Former-commit-id: 4a3b6a8f34894f3bfbe27d14e793161bdb0c02c9

Files changed (2) hide show
  1. SRT.py +11 -1
  2. pipeline.py +1 -1
SRT.py CHANGED
@@ -371,8 +371,18 @@ class SRT_script():
371
  [real_word, pos] = self.get_real_word(word)
372
  if not dict.check(word[:pos]):
373
  suggest = term_spellDict.suggest(real_word)
374
- if suggest: # relax spell check
 
 
 
 
 
 
 
 
375
  new_word = word.replace(word[:pos],suggest[0])
 
 
376
  else:
377
  new_word = word
378
  ready_words[i] = new_word
 
371
  [real_word, pos] = self.get_real_word(word)
372
  if not dict.check(word[:pos]):
373
  suggest = term_spellDict.suggest(real_word)
374
+ if suggest and enchant.utils.levenshtein(word, suggest[0]) < (len(word)+len(suggest[0]))/4: # relax spell check
375
+
376
+ #with open("dislog.log","a") as log:
377
+ # if not os.path.exists("dislog.log"):
378
+ # log.write("word \t suggest \t levenshtein \n")
379
+ # log.write(word + "\t" + suggest[0] + "\t" + str(enchant.utils.levenshtein(word, suggest[0]))+'\n')
380
+ print(word + ":" + suggest[0] + ":---:levenshtein:" + str(enchant.utils.levenshtein(word, suggest[0])))
381
+
382
+
383
  new_word = word.replace(word[:pos],suggest[0])
384
+ else:
385
+ new_word = word
386
  else:
387
  new_word = word
388
  ready_words[i] = new_word
pipeline.py CHANGED
@@ -255,7 +255,7 @@ def main():
255
 
256
  # SRT class preprocess
257
  srt.form_whole_sentence()
258
- # srt.spell_check_term()
259
  srt.correct_with_force_term()
260
  srt.write_srt_file_src(srt_file_en)
261
  script_input = srt.get_source_only()
 
255
 
256
  # SRT class preprocess
257
  srt.form_whole_sentence()
258
+ srt.spell_check_term()
259
  srt.correct_with_force_term()
260
  srt.write_srt_file_src(srt_file_en)
261
  script_input = srt.get_source_only()