Spaces:
Sleeping
Sleeping
from flask import Flask, request, jsonify, render_template | |
import re | |
from collections import Counter | |
app = Flask(__name__) | |
def processData(fileName): | |
words = [] | |
with open(fileName) as f: | |
fileNameData = f.read() | |
fileNameData = fileNameData.lower() | |
words = re.findall(r'\w+', fileNameData) | |
return words | |
wordList = processData("text_data.txt") | |
vocabulary = set(wordList) | |
def getCount(wordList): | |
wordCountDictionary = {} | |
wordCountDictionary = Counter(wordList) | |
return wordCountDictionary | |
wordCountDictionary = getCount(wordList) | |
def deleteLetter(word): | |
deleteList = [] | |
splitList = [] | |
for i in range(len(word)): | |
splitList.append([word[:i], word[i:]]) | |
for a, b in splitList: | |
deleteList.append(a + b[1:]) | |
return deleteList | |
def replaceLetter(word): | |
letter = 'abcdefghijklmnopqrstuvwxyz' | |
replaceList = [] | |
splitList = [] | |
for c in range(len(word)): | |
splitList.append((word[0:c], word[c:])) | |
replaceList = [a + l + (b[1:] if len(b) > 1 else '') for a, b in splitList if b for l in letter] | |
replaceSet = set(replaceList) | |
replaceSet.remove(word) | |
return replaceList | |
def insertLetter(word): | |
letter = 'abcdefghijklmnopqrstuvwxyz' | |
insertList = [] | |
splitList = [] | |
for c in range(len(word) + 1): | |
splitList.append((word[0:c], word[c:])) | |
insertList = [a + l + b for a, b in splitList for l in letter] | |
return insertList | |
def getProbablity(wordCountDictionary): | |
probablities = {} | |
m = sum(wordCountDictionary.values()) | |
for key in wordCountDictionary: | |
probablities[key] = wordCountDictionary[key] / m | |
return probablities | |
probablity = getProbablity(wordCountDictionary) | |
def switchLetter(word): | |
splitList = [] | |
switchL = [] | |
for i in range(len(word)): | |
splitList.append((word[0:i], word[i:])) | |
switchL = [a + b[1] + b[0] + b[2:] for a, b in splitList if len(b) >= 2] | |
return switchL | |
def editOneLetter(word, allowSwitches=True): | |
editOneSet = set() | |
editOneSet.update(deleteLetter(word)) | |
if allowSwitches: | |
editOneSet.update(switchLetter(word)) | |
editOneSet.update(replaceLetter(word)) | |
editOneSet.update(insertLetter(word)) | |
return editOneSet | |
def editTwoLetters(word, allowSwitches=True): | |
editTwoSet = set() | |
editOne = editOneLetter(word, allowSwitches=allowSwitches) | |
for w in editOne: | |
if w: | |
editTwo = editOneLetter(w, allowSwitches=allowSwitches) | |
editTwoSet.update(editTwo) | |
return editTwoSet | |
def getCorrections(word, probablity, vocabulary, max_suggestions=6): | |
suggestions = [] | |
suggestions = list((word in vocabulary and word) or editOneLetter(word).intersection(vocabulary) or editTwoLetters( | |
word).intersection(vocabulary)) | |
suggestions_with_prob = [[s, probablity[s]] for s in list(reversed(suggestions))] | |
suggestions_with_prob.sort(key=lambda x: x[1], reverse=True) | |
top_suggestions = suggestions_with_prob[:max_suggestions] | |
return top_suggestions | |
def index(): | |
return render_template('index.html') | |
def correct_word(): | |
myWord = request.args.get('word') | |
if not myWord: | |
return jsonify({"error": "Please provide a word."}), 400 | |
temporaryCorrections = getCorrections(myWord, probablity, vocabulary) | |
temporaryCorrections.sort(key=lambda x: x[1], reverse=True) | |
response = [{"word": word, "probability": probability} for word, probability in temporaryCorrections] | |
return jsonify(response) | |
if __name__ == '__main__': | |
app.run(debug=True) | |