fire-coml-summer-2022 / Autocorrect /autocorrectreal.py
Steven Zhang
autocorrect merged, finished training spanish
21c0ae2
raw
history blame
856 Bytes
# -*- coding: utf-8 -*-
"""autocorrectreal.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1aH5mYp1dxyn55XMjtVUllBvg37nqGVir
"""
import re
from collections import Counter
import numpy as np
import pandas as pd
import textdistance
w = []
with open('Autocorrect/words.txt', 'r') as f:
file_name_data = f.read()
file_name_data = file_name_data.lower()
w = re.findall('\w+', file_name_data)
from nltk.metrics.distance import edit_distance
def edit(input_sentence):
sentence = input_sentence.split()
for i in sentence:
if i.lower() in w:
continue
else:
distances = ((edit_distance(i,word),word)for word in w)
closest = min(distances)
sentence[sentence.index(i)] = closest[1]
output_sentence = ' '.join(sentence)
return output_sentence