cetinca Hobson commited on
Commit
9554084
0 Parent(s):

Duplicate from TangibleAI/mathtext-nlu

Browse files

Co-authored-by: Hobson Lane <Hobson@users.noreply.huggingface.co>

Files changed (5) hide show
  1. .gitattributes +27 -0
  2. .gitignore +100 -0
  3. README.md +13 -0
  4. app.py +179 -0
  5. requirements.txt +4 -0
.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.onnx filter=lfs diff=lfs merge=lfs -text
13
+ *.ot filter=lfs diff=lfs merge=lfs -text
14
+ *.parquet filter=lfs diff=lfs merge=lfs -text
15
+ *.pb filter=lfs diff=lfs merge=lfs -text
16
+ *.pt filter=lfs diff=lfs merge=lfs -text
17
+ *.pth filter=lfs diff=lfs merge=lfs -text
18
+ *.rar filter=lfs diff=lfs merge=lfs -text
19
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
21
+ *.tflite filter=lfs diff=lfs merge=lfs -text
22
+ *.tgz filter=lfs diff=lfs merge=lfs -text
23
+ *.wasm filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Temporary and binary files
2
+ venv
3
+ *.bak
4
+ *~
5
+ *.py[cod]
6
+ *.so
7
+ *.cfg
8
+ !.isort.cfg
9
+ *.orig
10
+ *.log
11
+ *.pot
12
+ .pytest_cache/*
13
+ __pycache__/*
14
+ .DS_*
15
+ .cache/*
16
+ .*.swp
17
+ .ipynb_checkpoints
18
+ */.ipynb_checkpoints/*
19
+ .coverage*
20
+ cache_dir/
21
+
22
+
23
+ # pypi package setup files
24
+ # pyproject must not be in root dir for `pip install -e .` to work
25
+ # pyproject.toml
26
+ !scripts/pyproject.toml
27
+ !setup.cfg
28
+
29
+ # Internal working cache for poetry
30
+ poetry.lock
31
+
32
+
33
+ # Project files
34
+ .ropeproject
35
+ .project
36
+ .pydevproject
37
+ .settings
38
+ .idea
39
+ .vscode
40
+ tags
41
+ *.code-workspace
42
+ *.sublime-workspace
43
+
44
+ # Package files
45
+ *.egg
46
+ *.eggs/
47
+ .installed.cfg
48
+ *.egg-info
49
+
50
+ # Unittest and coverage
51
+ htmlcov/*
52
+ .coverage
53
+ .tox
54
+ junit.xml
55
+ coverage.xml
56
+ .pytest_cache/
57
+ pytest-results*
58
+
59
+ # Build and docs folder/files
60
+ build/*
61
+ dist/*
62
+ sdist/*
63
+ docs/api/*
64
+ docs/_rst/*
65
+ docs/_build/*
66
+ cover/*
67
+ runs/*
68
+ MANIFEST
69
+
70
+ # Mac
71
+ __MACOSX
72
+
73
+ # KDE
74
+ .directory
75
+
76
+ # BigData and Models
77
+ db.sqlite3
78
+ *.bz2
79
+ *.zip
80
+ *.pkl
81
+ *.npy
82
+ **/data/qa-models/*
83
+ **/data/models/pulse/*
84
+ **/data/predictions*
85
+ **/data/squad/*
86
+ **/data/testsets/accuracy_report*
87
+ **/albert-large*
88
+ **/data/corpora/wikipedia/wikipedia-titles*
89
+
90
+ # Per-project virtualenvs
91
+ .venv*/
92
+
93
+ # docs html
94
+ docs/**/*.html
95
+
96
+ # secrets
97
+ .env
98
+ .bash_env
99
+ **/*secret*
100
+ **/*private*
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: MathText
3
+ app_file: app.py
4
+ sdk: gradio
5
+ sdk_version: 3.0.2
6
+ license: agpl-3.0
7
+ duplicated_from: TangibleAI/mathtext-nlu
8
+ ---
9
+
10
+ ## MathText NLU
11
+
12
+ Natural Language Understanding for math symbols, digits, and words with a Gradio user interface and REST API.
13
+
app.py ADDED
@@ -0,0 +1,179 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import inspect
2
+ import json
3
+ import logging
4
+ import os
5
+ import gradio as gr
6
+ from gradio import routes
7
+ import spacy # noqa
8
+ from typing import List, Type
9
+
10
+ TOKENS2INT_ERROR_INT = 32202
11
+
12
+ log = logging.getLogger()
13
+
14
+ ONES = [
15
+ "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
16
+ "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
17
+ "sixteen", "seventeen", "eighteen", "nineteen",
18
+ ]
19
+
20
+ # token_mapping = json.load(open('str_mapping.json'))
21
+ CHAR_MAPPING = {
22
+ "-": " ",
23
+ "_": " ",
24
+ }
25
+ CHAR_MAPPING.update((str(i), word) for i, word in enumerate([" " + s + " " for s in ONES]))
26
+
27
+ TOKEN_MAPPING = dict(enumerate([" " + s + " " for s in ONES]))
28
+
29
+ BQ_JSON = os.environ['BQ_JSON']
30
+
31
+
32
+ def tokenize(text):
33
+ return text.split()
34
+
35
+
36
+ def detokenize(tokens):
37
+ return ' '.join(tokens)
38
+
39
+
40
+ def replace_tokens(tokens, token_mapping=TOKEN_MAPPING):
41
+ return [token_mapping.get(tok, tok) for tok in tokens]
42
+
43
+
44
+ def replace_chars(text, char_mapping=CHAR_MAPPING):
45
+ return ''.join((char_mapping.get(c, c) for c in text))
46
+
47
+
48
+ def tokens2int(tokens, numwords={}):
49
+ """ Convert an English str containing number words into an int
50
+
51
+ >>> text2int("nine")
52
+ 9
53
+ >>> text2int("forty two")
54
+ 42
55
+ >>> text2int("1 2 three")
56
+ 123
57
+ """
58
+ if not numwords:
59
+
60
+ tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
61
+
62
+ scales = ["hundred", "thousand", "million", "billion", "trillion"]
63
+
64
+ numwords["and"] = (1, 0)
65
+ for idx, word in enumerate(ONES):
66
+ numwords[word] = (1, idx)
67
+ for idx, word in enumerate(tens):
68
+ numwords[word] = (1, idx * 10)
69
+ for idx, word in enumerate(scales):
70
+ numwords[word] = (10 ** (idx * 3 or 2), 0)
71
+
72
+ current = result = 0
73
+
74
+ for word in tokens:
75
+ if word not in numwords:
76
+ raise Exception("Illegal word: " + word)
77
+
78
+ scale, increment = numwords[word]
79
+ current = current * scale + increment
80
+ if scale > 100:
81
+ result += current
82
+ current = 0
83
+
84
+ return str(result + current)
85
+
86
+
87
+ def text2int(text):
88
+ return tokens2int(tokenize(replace_chars(text)))
89
+
90
+
91
+ def try_text2int(text):
92
+ text = str(text)
93
+ try:
94
+ intstr = tokens2int(tokens2int(tokenize(replace_chars(text))))
95
+ except Exception as e:
96
+ log.error(str(e))
97
+ log.error(f'User input: {text}')
98
+ intstr = TOKENS2INT_ERROR_INT
99
+ return str(intstr)
100
+
101
+
102
+ def try_text2int_preprocessed(text):
103
+ text = str(text)
104
+ try:
105
+ tokens = replace_tokens(tokenize(replace_chars(str(text))))
106
+ except Exception as e:
107
+ log.error(str(e))
108
+ tokens = text.split()
109
+ try:
110
+ intstr = tokens2int(tokens)
111
+ except Exception as e:
112
+ log.error(str(e))
113
+ intstr = str(TOKENS2INT_ERROR_INT)
114
+ return intstr
115
+
116
+
117
+ def get_types(cls_set: List[Type], component: str):
118
+ docset = []
119
+ types = []
120
+ if component == "input":
121
+ for cls in cls_set:
122
+ doc = inspect.getdoc(cls)
123
+ doc_lines = doc.split("\n")
124
+ docset.append(doc_lines[1].split(":")[-1])
125
+ types.append(doc_lines[1].split(")")[0].split("(")[-1])
126
+ else:
127
+ for cls in cls_set:
128
+ doc = inspect.getdoc(cls)
129
+ doc_lines = doc.split("\n")
130
+ docset.append(doc_lines[-1].split(":")[-1])
131
+ types.append(doc_lines[-1].split(")")[0].split("(")[-1])
132
+ return docset, types
133
+
134
+
135
+ routes.get_types = get_types
136
+
137
+ with gr.Blocks() as html_block:
138
+ gr.Markdown("# Gradio Blocks (3.0) with REST API")
139
+ textbox_input = gr.Textbox(
140
+ value="forty-two",
141
+ label="Input number words:",
142
+ )
143
+ button_text2int = gr.Button("text2int")
144
+ button_text2int_preprocessed = gr.Button("text2int with preprocessing")
145
+ textbox_output = gr.Textbox(
146
+ value="42",
147
+ label="Output integer:"
148
+ )
149
+ button_text2int.click(try_text2int, inputs=[textbox_input], outputs=[textbox_output])
150
+ button_text2int_preprocessed.click(try_text2int_preprocessed, inputs=[textbox_input], outputs=[textbox_output])
151
+ gr.Markdown(r"""
152
+
153
+ ## API
154
+
155
+ You can select which function to run using the `fn_index` argument:
156
+
157
+ ```python
158
+ import requests
159
+
160
+ requests.post(
161
+ url="https://Hobson-gradio-rest-api.hf.space/api/predict/", json={"data": ["one hundred forty-two"], "fn_index": 0}
162
+ ).json()
163
+ ```
164
+
165
+ Or using `curl`:
166
+
167
+ ```bash
168
+ curl -X POST https://Hobson-gradio-rest-api.hf.space/api/predict/ -H 'Content-Type: application/json' -d '{"data": ["one hundred forty-two"], "fn_index": 0}'
169
+ ```
170
+ """ + f"{json.loads(BQ_JSON)['type']}")
171
+
172
+ interface = gr.Interface(lambda: None, inputs=[textbox_input], outputs=[textbox_output])
173
+
174
+ html_block.input_components = interface.input_components
175
+ html_block.output_components = interface.output_components
176
+ html_block.examples = None
177
+ html_block.predict_durations = []
178
+
179
+ bapp = html_block.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ spacy
2
+ pandas
3
+ pandas-gbq
4
+ gradio>=3.0.2,<3.1.0