Spaces:
Sleeping
Sleeping
File size: 3,475 Bytes
7e380c3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
from flask import Flask, request, jsonify, render_template
import re
from collections import Counter
app = Flask(__name__)
def processData(fileName):
words = []
with open(fileName) as f:
fileNameData = f.read()
fileNameData = fileNameData.lower()
words = re.findall(r'\w+', fileNameData)
return words
wordList = processData("text_data.txt")
vocabulary = set(wordList)
def getCount(wordList):
wordCountDictionary = {}
wordCountDictionary = Counter(wordList)
return wordCountDictionary
wordCountDictionary = getCount(wordList)
def deleteLetter(word):
deleteList = []
splitList = []
for i in range(len(word)):
splitList.append([word[:i], word[i:]])
for a, b in splitList:
deleteList.append(a + b[1:])
return deleteList
def replaceLetter(word):
letter = 'abcdefghijklmnopqrstuvwxyz'
replaceList = []
splitList = []
for c in range(len(word)):
splitList.append((word[0:c], word[c:]))
replaceList = [a + l + (b[1:] if len(b) > 1 else '') for a, b in splitList if b for l in letter]
replaceSet = set(replaceList)
replaceSet.remove(word)
return replaceList
def insertLetter(word):
letter = 'abcdefghijklmnopqrstuvwxyz'
insertList = []
splitList = []
for c in range(len(word) + 1):
splitList.append((word[0:c], word[c:]))
insertList = [a + l + b for a, b in splitList for l in letter]
return insertList
def getProbablity(wordCountDictionary):
probablities = {}
m = sum(wordCountDictionary.values())
for key in wordCountDictionary:
probablities[key] = wordCountDictionary[key] / m
return probablities
probablity = getProbablity(wordCountDictionary)
def switchLetter(word):
splitList = []
switchL = []
for i in range(len(word)):
splitList.append((word[0:i], word[i:]))
switchL = [a + b[1] + b[0] + b[2:] for a, b in splitList if len(b) >= 2]
return switchL
def editOneLetter(word, allowSwitches=True):
editOneSet = set()
editOneSet.update(deleteLetter(word))
if allowSwitches:
editOneSet.update(switchLetter(word))
editOneSet.update(replaceLetter(word))
editOneSet.update(insertLetter(word))
return editOneSet
def editTwoLetters(word, allowSwitches=True):
editTwoSet = set()
editOne = editOneLetter(word, allowSwitches=allowSwitches)
for w in editOne:
if w:
editTwo = editOneLetter(w, allowSwitches=allowSwitches)
editTwoSet.update(editTwo)
return editTwoSet
def getCorrections(word, probablity, vocabulary):
suggestion = []
n_best = []
suggestions = list((word in vocabulary and word) or editOneLetter(word).intersection(vocabulary) or editTwoLetters(
word).intersection(vocabulary))
n_best = [[s, probablity[s]] for s in list(reversed(suggestions))]
return n_best
@app.route('/')
def index():
return render_template('index.html')
@app.route('/correct', methods=['GET'])
def correct_word():
myWord = request.args.get('word')
if not myWord:
return jsonify({"error": "Please provide a word."}), 400
temporaryCorrections = getCorrections(myWord, probablity, vocabulary)
temporaryCorrections.sort(key=lambda x: x[1], reverse=True)
response = [{"word": word, "probability": probability} for word, probability in temporaryCorrections]
return jsonify(response)
if __name__ == '__main__':
app.run(debug=True)
|