autocorrect / app.py
Elalimy's picture
'init'
7e380c3
raw
history blame
No virus
3.48 kB
from flask import Flask, request, jsonify, render_template
import re
from collections import Counter
app = Flask(__name__)
def processData(fileName):
words = []
with open(fileName) as f:
fileNameData = f.read()
fileNameData = fileNameData.lower()
words = re.findall(r'\w+', fileNameData)
return words
wordList = processData("text_data.txt")
vocabulary = set(wordList)
def getCount(wordList):
wordCountDictionary = {}
wordCountDictionary = Counter(wordList)
return wordCountDictionary
wordCountDictionary = getCount(wordList)
def deleteLetter(word):
deleteList = []
splitList = []
for i in range(len(word)):
splitList.append([word[:i], word[i:]])
for a, b in splitList:
deleteList.append(a + b[1:])
return deleteList
def replaceLetter(word):
letter = 'abcdefghijklmnopqrstuvwxyz'
replaceList = []
splitList = []
for c in range(len(word)):
splitList.append((word[0:c], word[c:]))
replaceList = [a + l + (b[1:] if len(b) > 1 else '') for a, b in splitList if b for l in letter]
replaceSet = set(replaceList)
replaceSet.remove(word)
return replaceList
def insertLetter(word):
letter = 'abcdefghijklmnopqrstuvwxyz'
insertList = []
splitList = []
for c in range(len(word) + 1):
splitList.append((word[0:c], word[c:]))
insertList = [a + l + b for a, b in splitList for l in letter]
return insertList
def getProbablity(wordCountDictionary):
probablities = {}
m = sum(wordCountDictionary.values())
for key in wordCountDictionary:
probablities[key] = wordCountDictionary[key] / m
return probablities
probablity = getProbablity(wordCountDictionary)
def switchLetter(word):
splitList = []
switchL = []
for i in range(len(word)):
splitList.append((word[0:i], word[i:]))
switchL = [a + b[1] + b[0] + b[2:] for a, b in splitList if len(b) >= 2]
return switchL
def editOneLetter(word, allowSwitches=True):
editOneSet = set()
editOneSet.update(deleteLetter(word))
if allowSwitches:
editOneSet.update(switchLetter(word))
editOneSet.update(replaceLetter(word))
editOneSet.update(insertLetter(word))
return editOneSet
def editTwoLetters(word, allowSwitches=True):
editTwoSet = set()
editOne = editOneLetter(word, allowSwitches=allowSwitches)
for w in editOne:
if w:
editTwo = editOneLetter(w, allowSwitches=allowSwitches)
editTwoSet.update(editTwo)
return editTwoSet
def getCorrections(word, probablity, vocabulary):
suggestion = []
n_best = []
suggestions = list((word in vocabulary and word) or editOneLetter(word).intersection(vocabulary) or editTwoLetters(
word).intersection(vocabulary))
n_best = [[s, probablity[s]] for s in list(reversed(suggestions))]
return n_best
@app.route('/')
def index():
return render_template('index.html')
@app.route('/correct', methods=['GET'])
def correct_word():
myWord = request.args.get('word')
if not myWord:
return jsonify({"error": "Please provide a word."}), 400
temporaryCorrections = getCorrections(myWord, probablity, vocabulary)
temporaryCorrections.sort(key=lambda x: x[1], reverse=True)
response = [{"word": word, "probability": probability} for word, probability in temporaryCorrections]
return jsonify(response)
if __name__ == '__main__':
app.run(debug=True)