cetinca commited on
Commit
28c4823
1 Parent(s): 9de3e37

Copied original content

Browse files
Files changed (3) hide show
  1. app.py +23 -5
  2. app2.py +157 -0
  3. mathtext-nlu-api.py +6 -6
app.py CHANGED
@@ -50,7 +50,6 @@ def replace_chars(text, char_mapping=CHAR_MAPPING):
50
 
51
  def tokens2int(tokens, numwords={}):
52
  """ Convert an English str containing number words into an int
53
-
54
  >>> text2int("nine")
55
  9
56
  >>> text2int("forty two")
@@ -151,7 +150,26 @@ with gr.Blocks() as html_block:
151
  )
152
  button_text2int.click(try_text2int, inputs=[textbox_input], outputs=[textbox_output])
153
  button_text2int_preprocessed.click(try_text2int_preprocessed, inputs=[textbox_input], outputs=[textbox_output])
154
-
155
-
156
- interface = gr.Interface(fn=try_text2int_preprocessed, inputs=[textbox_input], outputs=[textbox_output])
157
- interface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  def tokens2int(tokens, numwords={}):
52
  """ Convert an English str containing number words into an int
 
53
  >>> text2int("nine")
54
  9
55
  >>> text2int("forty two")
 
150
  )
151
  button_text2int.click(try_text2int, inputs=[textbox_input], outputs=[textbox_output])
152
  button_text2int_preprocessed.click(try_text2int_preprocessed, inputs=[textbox_input], outputs=[textbox_output])
153
+ gr.Markdown(r"""
154
+ ## API
155
+ You can select which function to run using the `fn_index` argument:
156
+ ```python
157
+ import requests
158
+ requests.post(
159
+ url="https://cetinca-mathtext-nlu.hf.space/run/predict/run/predict", json={"data": ["one hundred forty-two"], "fn_index": 0}
160
+ ).json()
161
+ ```
162
+ Or using `curl`:
163
+ ```bash
164
+ curl -X POST https://cetinca-mathtext-nlu.hf.space/run/predict/ -H 'Content-Type: application/json' -d '{"data": ["one hundred forty-two"], "fn_index": 0}'
165
+ ```
166
+ """ + f"{json.loads(BQ_JSON)['type']}")
167
+
168
+ interface = gr.Interface(lambda: None, inputs=[textbox_input], outputs=[textbox_output])
169
+
170
+ html_block.input_components = interface.input_components
171
+ html_block.output_components = interface.output_components
172
+ html_block.examples = None
173
+ html_block.predict_durations = []
174
+
175
+ bapp = html_block.launch()
app2.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import inspect
2
+ import json
3
+ import logging
4
+ import os
5
+ import gradio as gr
6
+ from gradio import routes
7
+ import spacy # noqa
8
+ from typing import List, Type
9
+ from dotenv import load_dotenv
10
+
11
+ load_dotenv()
12
+
13
+ TOKENS2INT_ERROR_INT = 32202
14
+
15
+ log = logging.getLogger()
16
+
17
+ ONES = [
18
+ "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
19
+ "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
20
+ "sixteen", "seventeen", "eighteen", "nineteen",
21
+ ]
22
+
23
+ # token_mapping = json.load(open('str_mapping.json'))
24
+ CHAR_MAPPING = {
25
+ "-": " ",
26
+ "_": " ",
27
+ }
28
+ CHAR_MAPPING.update((str(i), word) for i, word in enumerate([" " + s + " " for s in ONES]))
29
+
30
+ TOKEN_MAPPING = dict(enumerate([" " + s + " " for s in ONES]))
31
+
32
+ BQ_JSON = os.environ['BQ_JSON']
33
+
34
+
35
+ def tokenize(text):
36
+ return text.split()
37
+
38
+
39
+ def detokenize(tokens):
40
+ return ' '.join(tokens)
41
+
42
+
43
+ def replace_tokens(tokens, token_mapping=TOKEN_MAPPING):
44
+ return [token_mapping.get(tok, tok) for tok in tokens]
45
+
46
+
47
+ def replace_chars(text, char_mapping=CHAR_MAPPING):
48
+ return ''.join((char_mapping.get(c, c) for c in text))
49
+
50
+
51
+ def tokens2int(tokens, numwords={}):
52
+ """ Convert an English str containing number words into an int
53
+
54
+ >>> text2int("nine")
55
+ 9
56
+ >>> text2int("forty two")
57
+ 42
58
+ >>> text2int("1 2 three")
59
+ 123
60
+ """
61
+ if not numwords:
62
+
63
+ tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
64
+
65
+ scales = ["hundred", "thousand", "million", "billion", "trillion"]
66
+
67
+ numwords["and"] = (1, 0)
68
+ for idx, word in enumerate(ONES):
69
+ numwords[word] = (1, idx)
70
+ for idx, word in enumerate(tens):
71
+ numwords[word] = (1, idx * 10)
72
+ for idx, word in enumerate(scales):
73
+ numwords[word] = (10 ** (idx * 3 or 2), 0)
74
+
75
+ current = result = 0
76
+
77
+ for word in tokens:
78
+ if word not in numwords:
79
+ raise Exception("Illegal word: " + word)
80
+
81
+ scale, increment = numwords[word]
82
+ current = current * scale + increment
83
+ if scale > 100:
84
+ result += current
85
+ current = 0
86
+
87
+ return str(result + current)
88
+
89
+
90
+ def text2int(text):
91
+ return tokens2int(tokenize(replace_chars(text)))
92
+
93
+
94
+ def try_text2int(text):
95
+ text = str(text)
96
+ try:
97
+ intstr = tokens2int(tokens2int(tokenize(replace_chars(text))))
98
+ except Exception as e:
99
+ log.error(str(e))
100
+ log.error(f'User input: {text}')
101
+ intstr = TOKENS2INT_ERROR_INT
102
+ return str(intstr)
103
+
104
+
105
+ def try_text2int_preprocessed(text):
106
+ text = str(text)
107
+ try:
108
+ tokens = replace_tokens(tokenize(replace_chars(str(text))))
109
+ except Exception as e:
110
+ log.error(str(e))
111
+ tokens = text.split()
112
+ try:
113
+ intstr = tokens2int(tokens)
114
+ except Exception as e:
115
+ log.error(str(e))
116
+ intstr = str(TOKENS2INT_ERROR_INT)
117
+ return intstr
118
+
119
+
120
+ def get_types(cls_set: List[Type], component: str):
121
+ docset = []
122
+ types = []
123
+ if component == "input":
124
+ for cls in cls_set:
125
+ doc = inspect.getdoc(cls)
126
+ doc_lines = doc.split("\n")
127
+ docset.append(doc_lines[1].split(":")[-1])
128
+ types.append(doc_lines[1].split(")")[0].split("(")[-1])
129
+ else:
130
+ for cls in cls_set:
131
+ doc = inspect.getdoc(cls)
132
+ doc_lines = doc.split("\n")
133
+ docset.append(doc_lines[-1].split(":")[-1])
134
+ types.append(doc_lines[-1].split(")")[0].split("(")[-1])
135
+ return docset, types
136
+
137
+
138
+ routes.get_types = get_types
139
+
140
+ with gr.Blocks() as html_block:
141
+ gr.Markdown("# Gradio Blocks (3.0) with REST API")
142
+ textbox_input = gr.Textbox(
143
+ value="forty-two",
144
+ label="Input number words:",
145
+ )
146
+ button_text2int = gr.Button("text2int")
147
+ button_text2int_preprocessed = gr.Button("text2int with preprocessing")
148
+ textbox_output = gr.Textbox(
149
+ value="42",
150
+ label="Output integer:"
151
+ )
152
+ button_text2int.click(try_text2int, inputs=[textbox_input], outputs=[textbox_output])
153
+ button_text2int_preprocessed.click(try_text2int_preprocessed, inputs=[textbox_input], outputs=[textbox_output])
154
+
155
+
156
+ interface = gr.Interface(fn=try_text2int_preprocessed, inputs=[textbox_input], outputs=[textbox_output])
157
+ interface.launch()
mathtext-nlu-api.py CHANGED
@@ -1,18 +1,18 @@
1
  import os
2
- from dotenv import load_dotenv
3
  import requests
 
4
 
5
  load_dotenv()
6
 
7
- # data = input("Enter a number as text:\n").split()
8
- data = "four two".split()
9
 
10
  HUG_ACC_TOKEN = os.environ.get("HUG_ACC_TOKEN")
11
 
12
  response = requests.post(
13
  url="https://cetinca-mathtext-nlu.hf.space/run/predict",
14
- json={"data": data, "fn_index": 0},
15
  # headers={'Authorization': HUG_ACC_TOKEN},
16
- )
17
 
18
- print(response.status_code)
 
1
  import os
2
+
3
  import requests
4
+ from dotenv import load_dotenv
5
 
6
  load_dotenv()
7
 
8
+ data = [input("Enter a number as text:\n")]
 
9
 
10
  HUG_ACC_TOKEN = os.environ.get("HUG_ACC_TOKEN")
11
 
12
  response = requests.post(
13
  url="https://cetinca-mathtext-nlu.hf.space/run/predict",
14
+ json={"data": data, "fn_index": 1},
15
  # headers={'Authorization': HUG_ACC_TOKEN},
16
+ ).json()
17
 
18
+ print(response.get("data", "No response!"))