Spaces:
Build error
Build error
catasaurus
commited on
Commit
•
dc4f50e
1
Parent(s):
fc247ed
Added better spell check
Browse files
app.py
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
import gradio as gr
|
2 |
-
|
3 |
#import os
|
4 |
#os.environ['KMP_DUPLICATE_LIB_OK']='True'
|
5 |
#import spacy
|
6 |
-
|
|
|
7 |
# Change this according to what words should be corrected to
|
8 |
SPELL_CORRECT_MIN_CHAR_DIFF = 2
|
9 |
|
@@ -25,6 +25,39 @@ TOKEN_MAPPING = {
|
|
25 |
"and": " ",
|
26 |
"oh":"0",
|
27 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
def find_char_diff(a, b):
|
30 |
# Finds the character difference between two str objects by counting the occurences of every character. Not edit distance.
|
@@ -94,12 +127,14 @@ def convert_word_to_int(in_word, numwords={}):
|
|
94 |
return int(in_word)
|
95 |
except ValueError:
|
96 |
pass
|
|
|
97 |
# Spell correction using find_char_diff
|
98 |
char_diffs = [find_char_diff(in_word, i) for i in ONES + tens + scales]
|
99 |
min_char_diff = min(char_diffs)
|
100 |
if min_char_diff <= SPELL_CORRECT_MIN_CHAR_DIFF:
|
101 |
return char_diffs.index(min_char_diff)
|
102 |
-
|
|
|
103 |
|
104 |
def tokens2int(tokens):
|
105 |
# Takes a list of tokens and returns a int representation of them
|
@@ -187,5 +222,6 @@ def text2int(text):
|
|
187 |
return tokens2int(tokenize(text))
|
188 |
|
189 |
|
|
|
190 |
iface = gr.Interface(fn=text2int, inputs="text", outputs="text")
|
191 |
iface.launch()
|
|
|
1 |
import gradio as gr
|
|
|
2 |
#import os
|
3 |
#os.environ['KMP_DUPLICATE_LIB_OK']='True'
|
4 |
#import spacy
|
5 |
+
import re
|
6 |
+
from collections import Counter
|
7 |
# Change this according to what words should be corrected to
|
8 |
SPELL_CORRECT_MIN_CHAR_DIFF = 2
|
9 |
|
|
|
25 |
"and": " ",
|
26 |
"oh":"0",
|
27 |
}
|
28 |
+
def words(text): return re.findall(r'\w+', text.lower())
|
29 |
+
|
30 |
+
WORDS = Counter(words(open('numbers.txt').read()))
|
31 |
+
|
32 |
+
def P(word, N=sum(WORDS.values())):
|
33 |
+
"Probability of `word`."
|
34 |
+
return WORDS[word] / N
|
35 |
+
|
36 |
+
def correction(word):
|
37 |
+
"Most probable spelling correction for word."
|
38 |
+
return max(candidates(word), key=P)
|
39 |
+
|
40 |
+
def candidates(word):
|
41 |
+
"Generate possible spelling corrections for word."
|
42 |
+
return (known([word]) or known(edits1(word)) or known(edits2(word)) or [word])
|
43 |
+
|
44 |
+
def known(words):
|
45 |
+
"The subset of `words` that appear in the dictionary of WORDS."
|
46 |
+
return set(w for w in words if w in WORDS)
|
47 |
+
|
48 |
+
def edits1(word):
|
49 |
+
"All edits that are one edit away from `word`."
|
50 |
+
letters = 'abcdefghijklmnopqrstuvwxyz'
|
51 |
+
splits = [(word[:i], word[i:]) for i in range(len(word) + 1)]
|
52 |
+
deletes = [L + R[1:] for L, R in splits if R]
|
53 |
+
transposes = [L + R[1] + R[0] + R[2:] for L, R in splits if len(R)>1]
|
54 |
+
replaces = [L + c + R[1:] for L, R in splits if R for c in letters]
|
55 |
+
inserts = [L + c + R for L, R in splits for c in letters]
|
56 |
+
return set(deletes + transposes + replaces + inserts)
|
57 |
+
|
58 |
+
def edits2(word):
|
59 |
+
"All edits that are two edits away from `word`."
|
60 |
+
return (e2 for e1 in edits1(word) for e2 in edits1(e1))
|
61 |
|
62 |
def find_char_diff(a, b):
|
63 |
# Finds the character difference between two str objects by counting the occurences of every character. Not edit distance.
|
|
|
127 |
return int(in_word)
|
128 |
except ValueError:
|
129 |
pass
|
130 |
+
"""
|
131 |
# Spell correction using find_char_diff
|
132 |
char_diffs = [find_char_diff(in_word, i) for i in ONES + tens + scales]
|
133 |
min_char_diff = min(char_diffs)
|
134 |
if min_char_diff <= SPELL_CORRECT_MIN_CHAR_DIFF:
|
135 |
return char_diffs.index(min_char_diff)
|
136 |
+
"""
|
137 |
+
return numwords[correction(in_word)]
|
138 |
|
139 |
def tokens2int(tokens):
|
140 |
# Takes a list of tokens and returns a int representation of them
|
|
|
222 |
return tokens2int(tokenize(text))
|
223 |
|
224 |
|
225 |
+
|
226 |
iface = gr.Interface(fn=text2int, inputs="text", outputs="text")
|
227 |
iface.launch()
|