cetinca commited on
Commit
46f5320
0 Parent(s):

Initial commit

Browse files
.gitattributes ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tflite filter=lfs diff=lfs merge=lfs -text
29
+ *.tgz filter=lfs diff=lfs merge=lfs -text
30
+ *.wasm filter=lfs diff=lfs merge=lfs -text
31
+ *.xz filter=lfs diff=lfs merge=lfs -text
32
+ *.zip filter=lfs diff=lfs merge=lfs -text
33
+ *.zst filter=lfs diff=lfs merge=lfs -text
34
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/#use-with-ide
110
+ .pdm.toml
111
+
112
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113
+ __pypackages__/
114
+
115
+ # Celery stuff
116
+ celerybeat-schedule
117
+ celerybeat.pid
118
+
119
+ # SageMath parsed files
120
+ *.sage.py
121
+
122
+ # Environments
123
+ .env
124
+ .venv
125
+ env/
126
+ venv/
127
+ ENV/
128
+ env.bak/
129
+ venv.bak/
130
+
131
+ # Spyder project settings
132
+ .spyderproject
133
+ .spyproject
134
+
135
+ # Rope project settings
136
+ .ropeproject
137
+
138
+ # mkdocs documentation
139
+ /site
140
+
141
+ # mypy
142
+ .mypy_cache/
143
+ .dmypy.json
144
+ dmypy.json
145
+
146
+ # Pyre type checker
147
+ .pyre/
148
+
149
+ # pytype static type analyzer
150
+ .pytype/
151
+
152
+ # Cython debug symbols
153
+ cython_debug/
154
+
155
+ # PyCharm
156
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
159
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160
+ .idea/
161
+
162
+ *history_sentiment*
163
+ *history_text2int*
.gitlab-ci.yml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ run_tests:
2
+ image: python:3.10-slim-buster
3
+ before_script:
4
+ - echo "Image build has started!"
5
+ script:
6
+ - pip install -r requirements.txt && pytest --verbose
Dockerfile ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://huggingface.co/docs/hub/spaces-sdks-docker-first-demo
2
+
3
+ FROM python:3.9
4
+
5
+ WORKDIR /code
6
+
7
+ COPY ./requirements.txt /code/requirements.txt
8
+
9
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
10
+
11
+ RUN useradd -m -u 1000 user
12
+
13
+ USER user
14
+
15
+ ENV HOME=/home/user \
16
+ PATH=/home/user/.local/bin:$PATH
17
+
18
+ WORKDIR $HOME/app
19
+
20
+ COPY --chown=user . $HOME/app
21
+
22
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Mathtext Fastapi
3
+ emoji: 🐨
4
+ colorFrom: blue
5
+ colorTo: red
6
+ sdk: docker
7
+ pinned: false
8
+ license: agpl-3.0
9
+ ---
10
+
11
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """FastAPI endpoint
2
+ To run locally use 'uvicorn modules.app:app --host localhost --port 7860'
3
+ """
4
+
5
+ from fastapi import FastAPI, Request
6
+ from fastapi.responses import JSONResponse
7
+ from fastapi.staticfiles import StaticFiles
8
+ from fastapi.templating import Jinja2Templates
9
+ from pydantic import BaseModel
10
+
11
+ from modules.sentiment import sentiment
12
+ from modules.text2int import text2int
13
+
14
+ app = FastAPI()
15
+
16
+ app.mount("/static", StaticFiles(directory="static"), name="static")
17
+
18
+ templates = Jinja2Templates(directory="templates")
19
+
20
+
21
+ class Text(BaseModel):
22
+ content: str = ""
23
+
24
+
25
+ @app.get("/")
26
+ def home(request: Request):
27
+ return templates.TemplateResponse("home.html", {"request": request})
28
+
29
+
30
+ @app.post("/hello")
31
+ def hello(content: Text = None):
32
+ content = {"message": f"Hello {content.content}!"}
33
+ return JSONResponse(content=content)
34
+
35
+
36
+ @app.post("/sentiment-analysis")
37
+ def sentiment_analysis_ep(content: Text = None):
38
+ ml_response = sentiment(content.content)
39
+ content = {"message": ml_response}
40
+ return JSONResponse(content=content)
41
+
42
+
43
+ @app.post("/text2int")
44
+ def text2int_ep(content: Text = None):
45
+ ml_response = text2int(content.content)
46
+ content = {"message": ml_response}
47
+ return JSONResponse(content=content)
data/test_data_text2int.csv ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ input,output
2
+ fourteen,14
3
+ one thousand four hundred ninety two,1492
4
+ Fourteen Hundred Ninety-Two,1492
5
+ forteen,14
6
+ seventeen-thousand and seventy two,17072
7
+ two hundred and nine,209
8
+ # Following are the ones that the current text2int.py fails on,-
9
+ # ninety nine hundred and seventy seven,9977
10
+ # seven thousands,7000
11
+ # 2 hundreds,200
12
+ # 99 thousands and one,9901
13
+ # "forty-five thousand, seven hundred and nine",45709
14
+ # eighty eight hundred eighty,8880
15
+ # a hundred hundred,10000
16
+ # a hundred thousand,100000
17
+ # a hundred million,100000000
18
+ # nineteen ninety nine,1999
19
+ # forteen twenty seven,1427
20
+ # one nine eight five,1985
21
+ # nineteen eighty-five,1985
22
+ # six,6
23
+ # eighty,80
24
+ # eight three seven five three zero nine,8375308
25
+ # oh one,1
26
+ # six oh 1,601
27
+ # sex,6
28
+ # eight oh,80
29
+ # ate,8
30
+ # double eight,88
31
+ # eight three seven five three O nine,8375308
32
+ # eight three seven five three oh nine,8375308
33
+ # eight three seven five three 0 nine,8375308
34
+ # eight three seven five three oh ni-ee-ine,8375308
35
+ # two thousand ten,2010
36
+ # two thousand and ten,2010
37
+ # twelve million,12000000
38
+ # 8 billion,8000000000
39
+ # twenty ten,2010
40
+ # thirty-two hundred,3200
41
+ # nine,9
42
+ # forty two,42
43
+ # 1 2 three,123
modules/__init__.py ADDED
File without changes
modules/sentiment.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+
3
+ sentiment_obj = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
4
+
5
+
6
+ def sentiment(text):
7
+ # Returns sentiment value
8
+ return sentiment_obj(text)
modules/text2int.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spacy # noqa
2
+
3
+ # import os
4
+ # os.environ['KMP_DUPLICATE_LIB_OK']='True'
5
+ # import spacy
6
+
7
+ # Change this according to what words should be corrected to
8
+ SPELL_CORRECT_MIN_CHAR_DIFF = 2
9
+
10
+ TOKENS2INT_ERROR_INT = 32202
11
+
12
+ ONES = [
13
+ "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
14
+ "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
15
+ "sixteen", "seventeen", "eighteen", "nineteen",
16
+ ]
17
+
18
+ CHAR_MAPPING = {
19
+ "-": " ",
20
+ "_": " ",
21
+ "and": " ",
22
+ }
23
+ # CHAR_MAPPING.update((str(i), word) for i, word in enumerate([" " + s + " " for s in ONES]))
24
+ TOKEN_MAPPING = {
25
+ "and": " ",
26
+ "oh": "0",
27
+ }
28
+
29
+
30
+ def find_char_diff(a, b):
31
+ # Finds the character difference between two str objects by counting the occurences of every character. Not edit distance.
32
+ char_counts_a = {}
33
+ char_counts_b = {}
34
+ for char in a:
35
+ if char in char_counts_a.keys():
36
+ char_counts_a[char] += 1
37
+ else:
38
+ char_counts_a[char] = 1
39
+ for char in b:
40
+ if char in char_counts_b.keys():
41
+ char_counts_b[char] += 1
42
+ else:
43
+ char_counts_b[char] = 1
44
+ char_diff = 0
45
+ for i in char_counts_a:
46
+ if i in char_counts_b.keys():
47
+ char_diff += abs(char_counts_a[i] - char_counts_b[i])
48
+ else:
49
+ char_diff += char_counts_a[i]
50
+ return char_diff
51
+
52
+
53
+ def tokenize(text):
54
+ text = text.lower()
55
+ # print(text)
56
+ text = replace_tokens(''.join(i for i in replace_chars(text)).split())
57
+ # print(text)
58
+ text = [i for i in text if i != ' ']
59
+ # print(text)
60
+ output = []
61
+ for word in text:
62
+ # print(word)
63
+ output.append(convert_word_to_int(word))
64
+ output = [i for i in output if i != ' ']
65
+ # print(output)
66
+ return output
67
+
68
+
69
+ def detokenize(tokens):
70
+ return ' '.join(tokens)
71
+
72
+
73
+ def replace_tokens(tokens, token_mapping=TOKEN_MAPPING):
74
+ return [token_mapping.get(tok, tok) for tok in tokens]
75
+
76
+
77
+ def replace_chars(text, char_mapping=CHAR_MAPPING):
78
+ return [char_mapping.get(c, c) for c in text]
79
+
80
+
81
+ def convert_word_to_int(in_word, numwords={}):
82
+ # Converts a single word/str into a single int
83
+ tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
84
+ scales = ["hundred", "thousand", "million", "billion", "trillion"]
85
+ if not numwords:
86
+ for idx, word in enumerate(ONES):
87
+ numwords[word] = idx
88
+ for idx, word in enumerate(tens):
89
+ numwords[word] = idx * 10
90
+ for idx, word in enumerate(scales):
91
+ numwords[word] = 10 ** (idx * 3 or 2)
92
+ if in_word in numwords:
93
+ # print(in_word)
94
+ # print(numwords[in_word])
95
+ return numwords[in_word]
96
+ try:
97
+ int(in_word)
98
+ return int(in_word)
99
+ except ValueError:
100
+ pass
101
+ # Spell correction using find_char_diff
102
+ char_diffs = [find_char_diff(in_word, i) for i in ONES + tens + scales]
103
+ min_char_diff = min(char_diffs)
104
+ if min_char_diff <= SPELL_CORRECT_MIN_CHAR_DIFF:
105
+ return char_diffs.index(min_char_diff)
106
+
107
+
108
+ def tokens2int(tokens):
109
+ # Takes a list of tokens and returns a int representation of them
110
+ types = []
111
+ for i in tokens:
112
+ if i <= 9:
113
+ types.append(1)
114
+
115
+ elif i <= 90:
116
+ types.append(2)
117
+
118
+ else:
119
+ types.append(3)
120
+ # print(tokens)
121
+ if len(tokens) <= 3:
122
+ current = 0
123
+ for i, number in enumerate(tokens):
124
+ if i != 0 and types[i] < types[i - 1] and current != tokens[i - 1] and types[i - 1] != 3:
125
+ current += tokens[i] + tokens[i - 1]
126
+ elif current <= tokens[i] and current != 0:
127
+ current *= tokens[i]
128
+ elif 3 not in types and 1 not in types:
129
+ current = int(''.join(str(i) for i in tokens))
130
+ break
131
+ elif '111' in ''.join(str(i) for i in types) and 2 not in types and 3 not in types:
132
+ current = int(''.join(str(i) for i in tokens))
133
+ break
134
+ else:
135
+ current += number
136
+
137
+ elif 3 not in types and 2 not in types:
138
+ current = int(''.join(str(i) for i in tokens))
139
+
140
+ else:
141
+ """
142
+ double_list = []
143
+ current_double = []
144
+ double_type_list = []
145
+ for i in tokens:
146
+ if len(current_double) < 2:
147
+ current_double.append(i)
148
+ else:
149
+ double_list.append(current_double)
150
+ current_double = []
151
+ current_double = []
152
+ for i in types:
153
+ if len(current_double) < 2:
154
+ current_double.append(i)
155
+ else:
156
+ double_type_list.append(current_double)
157
+ current_double = []
158
+ print(double_type_list)
159
+ print(double_list)
160
+ current = 0
161
+ for i, type_double in enumerate(double_type_list):
162
+ if len(type_double) == 1:
163
+ current += double_list[i][0]
164
+ elif type_double[0] == type_double[1]:
165
+ current += int(str(double_list[i][0]) + str(double_list[i][1]))
166
+ elif type_double[0] > type_double[1]:
167
+ current += sum(double_list[i])
168
+ elif type_double[0] < type_double[1]:
169
+ current += double_list[i][0] * double_list[i][1]
170
+ #print(current)
171
+ """
172
+ count = 0
173
+ current = 0
174
+ for i, token in enumerate(tokens):
175
+ count += 1
176
+ if count == 2:
177
+ if types[i - 1] == types[i]:
178
+ current += int(str(token) + str(tokens[i - 1]))
179
+ elif types[i - 1] > types[i]:
180
+ current += tokens[i - 1] + token
181
+ else:
182
+ current += tokens[i - 1] * token
183
+ count = 0
184
+ elif i == len(tokens) - 1:
185
+ current += token
186
+
187
+ return current
188
+
189
+
190
+ def text2int(text):
191
+ # Wraps all of the functions up into one
192
+ return tokens2int(tokenize(text))
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.74.*
2
+ requests==2.27.*
3
+ sentencepiece==0.1.*
4
+ torch==1.11.*
5
+ transformers==4.*
6
+ uvicorn[standard]==0.17.*
7
+ pydantic
8
+ spacy
9
+ pandas
10
+ matplotlib
11
+ pytest
scripts/__init__.py ADDED
File without changes
scripts/api_scaling.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """https://zetcode.com/python/concurrent-http-requests/"""
2
+
3
+ import asyncio
4
+ import random
5
+ import time
6
+ import pandas as pd
7
+ import httpx
8
+ from os.path import exists
9
+
10
+ NUMBER_OF_CALLS = 1
11
+
12
+ headers = {"Content-Type": "application/json; charset=utf-8"}
13
+
14
+ # base_url = "https://tangibleai-mathtext.hf.space/run/{endpoint}"
15
+ base_url = "http://localhost:7860/run/{endpoint}"
16
+
17
+ data_list_1 = {
18
+ "endpoint": "text2int",
19
+ "test_data": [
20
+ "one hundred forty five",
21
+ "twenty thousand nine hundred fifty",
22
+ "one hundred forty five",
23
+ "nine hundred eighty three",
24
+ "five million",
25
+ ]
26
+ }
27
+
28
+ data_list_2 = {
29
+ "endpoint": "text2int-preprocessed",
30
+ "test_data": [
31
+ "one hundred forty five",
32
+ "twenty thousand nine hundred fifty",
33
+ "one hundred forty five",
34
+ "nine hundred eighty three",
35
+ "five million",
36
+ ]
37
+ }
38
+ data_list_3 = {
39
+ "endpoint": "sentiment-analysis",
40
+ "test_data": [
41
+ "Totally agree",
42
+ "I like it",
43
+ "No more",
44
+ "I am not sure",
45
+ "Never",
46
+ ]
47
+ }
48
+
49
+
50
+ # async call to endpoint
51
+ async def call_api(url, data, call_number, number_of_calls):
52
+ json = {"data": [data]}
53
+ async with httpx.AsyncClient() as client:
54
+ start = time.perf_counter() # Used perf_counter for more precise result.
55
+ response = await client.post(url=url, headers=headers, json=json, timeout=30)
56
+ end = time.perf_counter()
57
+ return {
58
+ "endpoint": url.split("/")[-1],
59
+ "test data": data,
60
+ "status code": response.status_code,
61
+ "response": response.json().get("data"),
62
+ "call number": call_number,
63
+ "number of calls": number_of_calls,
64
+ "start": start.__round__(4),
65
+ "end": end.__round__(4),
66
+ "delay": (end - start).__round__(4)
67
+ }
68
+
69
+
70
+ data_lists = [data_list_1, data_list_2, data_list_3]
71
+
72
+ results = []
73
+
74
+
75
+ async def main(number_of_calls):
76
+ for data_list in data_lists:
77
+ calls = []
78
+ for call_number in range(1, number_of_calls + 1):
79
+ url = base_url.format(endpoint=data_list["endpoint"])
80
+ data = random.choice(data_list["test_data"])
81
+ calls.append(call_api(url, data, call_number, number_of_calls))
82
+ r = await asyncio.gather(*calls)
83
+ results.extend(r)
84
+
85
+
86
+
87
+ start = time.perf_counter()
88
+ asyncio.run(main(NUMBER_OF_CALLS))
89
+ end = time.perf_counter()
90
+ print(end-start)
91
+ df = pd.DataFrame(results)
92
+
93
+ if exists("call_history.csv"):
94
+ df.to_csv(path_or_buf="call_history.csv", mode="a", header=False, index=False)
95
+ else:
96
+ df.to_csv(path_or_buf="call_history.csv", mode="w", header=True, index=False)
scripts/api_scaling.sh ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #! /bin/env bash
2
+
3
+ LOG_FILE_NAME="call_history_bash.csv"
4
+
5
+ if [[ ! -f "$LOG_FILE_NAME" ]]; then
6
+ # Creation of column names if the file does not exits
7
+ echo "student_id;active_students;endpoint;inputs;outputs;started;finished" >$LOG_FILE_NAME
8
+ fi
9
+
10
+ data_list_1() {
11
+ responses=(
12
+ "one hundred forty five"
13
+ "twenty thousand nine hundred fifty"
14
+ "one hundred forty five"
15
+ "nine hundred eighty three"
16
+ "five million"
17
+ )
18
+ echo "${responses[$1]}"
19
+ }
20
+
21
+ data_list_2() {
22
+ responses=(
23
+ "Totally agree"
24
+ "I like it"
25
+ "No more"
26
+ "I am not sure"
27
+ "Never"
28
+ )
29
+ echo "${responses[$1]}"
30
+ }
31
+
32
+ # endpoints: "text2int" "sentiment-analysis"
33
+ # selected endpoint to test
34
+ endpoint="sentiment-analysis"
35
+
36
+ create_random_delay() {
37
+ # creates a random delay for given arguments
38
+ echo "scale=8; $RANDOM/32768*$1" | bc
39
+ }
40
+
41
+ simulate_student() {
42
+ # Student simulator waits randomly between 0-10s after an interaction.
43
+ # Based on 100 interactions per student
44
+ for i in {1..100}; do
45
+
46
+ random_value=$((RANDOM % 5))
47
+ text=$(data_list_2 $random_value)
48
+ data='{"data": ["'$text'"]}'
49
+
50
+ start_=$(date +"%F %T.%6N")
51
+
52
+ url="https://tangibleai-mathtext.hf.space/run/$3"
53
+ response=$(curl --silent --connect-timeout 30 --max-time 30 -X POST "$url" -H 'Content-Type: application/json' -d "$data")
54
+
55
+ if [[ "$response" == *"Time-out"* ]]; then
56
+ echo "$response" >>bad_response.txt
57
+ response="504 Gateway Time-out"
58
+ elif [[ -z "$response" ]]; then
59
+ echo "No response" >>bad_response.txt
60
+ response="504 Gateway Time-out"
61
+ fi
62
+
63
+ end_=$(date +"%F %T.%6N")
64
+
65
+ printf "%s;%s;%s;%s;%s;%s;%s\n" "$1" "$2" "$3" "$data" "$response" "$start_" "$end_" >>$LOG_FILE_NAME
66
+ sleep "$(create_random_delay 10)"
67
+
68
+ done
69
+ }
70
+
71
+ echo "start: $(date)"
72
+
73
+ active_students=250 # the number of students using the system at the same time
74
+
75
+ i=1
76
+ while [[ "$i" -le "$active_students" ]]; do
77
+ simulate_student "student$i" "$active_students" "$endpoint" &
78
+ sleep "$(create_random_delay 1)" # adding a random delay between students
79
+ i=$(("$i" + 1))
80
+ done
81
+
82
+ wait
83
+ echo "end: $(date)"
scripts/make_request.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+
3
+ request = requests.post(url=
4
+ 'https://cetinca-fastapi-ep.hf.space/sentiment-analysis',
5
+ json={"content": "I reject it"}).json()
6
+
7
+ print(request)
8
+
9
+ request = requests.post(url=
10
+ 'https://cetinca-fastapi-ep.hf.space/text2int',
11
+ json={"content": "seven thousand nine hundred fifty seven"}
12
+ ).json()
13
+
14
+ print(request)
scripts/make_request.sh ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #root_url="localhost:7860"
2
+ root_url="https://cetinca-fastapi-ep.hf.space"
3
+
4
+ ep="/"
5
+ url=$root_url$ep
6
+ data=''
7
+
8
+ response=$(curl --silent -X GET "$url" -H 'Content-Type: application/json')
9
+
10
+ echo "URL: $url"
11
+ echo "Data: $data"
12
+ echo "Response: $response"
13
+ echo
14
+
15
+ sleep 0.1
16
+
17
+ ep="/hello"
18
+ url=$root_url$ep
19
+ data='{"content":"Rori"}'
20
+
21
+ response=$(curl --silent -X POST "$url" -H 'Content-Type: application/json' -d "$data")
22
+
23
+ echo "URL: $url"
24
+ echo "Data: $data"
25
+ echo "Response: $response"
26
+ echo
27
+
28
+ sleep 0.5
29
+
30
+ ep="/sentiment-analysis"
31
+ url=$root_url$ep
32
+ data='{"content":"I am happy with it!"}'
33
+
34
+ response=$(curl --silent -X POST "$url" -H 'Content-Type: application/json' -d "$data")
35
+
36
+ echo "URL: $url"
37
+ echo "Data: $data"
38
+ echo "Response: $response"
39
+ echo
40
+
41
+ sleep 0.5
42
+
43
+ ep="/text2int"
44
+ url=$root_url$ep
45
+ data='{"content":"one hundred forty two"}'
46
+
47
+ response=$(curl --silent -X POST "$url" -H 'Content-Type: application/json' -d "$data")
48
+
49
+ echo "URL: $url"
50
+ echo "Data: $data"
51
+ echo "Response: $response"
52
+ echo
scripts/plot_calls.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+ from datetime import datetime
3
+
4
+ import matplotlib.pyplot as plt
5
+ import pandas as pd
6
+
7
+ pd.set_option('display.max_columns', None)
8
+ pd.set_option('display.max_rows', None)
9
+
10
+ log_files = [
11
+ 'call_history_sentiment_1_bash.csv',
12
+ 'call_history_text2int_1_bash.csv',
13
+ ]
14
+
15
+ for log_file in log_files:
16
+ path_ = f"./data/{log_file}"
17
+ df = pd.read_csv(filepath_or_buffer=path_, sep=";")
18
+ df["finished_ts"] = df["finished"].apply(
19
+ lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f").timestamp())
20
+ df["started_ts"] = df["started"].apply(
21
+ lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f").timestamp())
22
+ df["elapsed"] = df["finished_ts"] - df["started_ts"]
23
+
24
+ df["success"] = df["outputs"].apply(lambda x: 0 if "Time-out" in x else 1)
25
+
26
+ student_numbers = sorted(df['active_students'].unique())
27
+
28
+ bins_dict = dict() # bins size for each group
29
+ min_finished_dict = dict() # zero time for each group
30
+
31
+ for student_number in student_numbers:
32
+ # for each student group calculates bins size and zero time
33
+ min_finished = df["finished_ts"][df["active_students"] == student_number].min()
34
+ max_finished = df["finished_ts"][df["active_students"] == student_number].max()
35
+ bins = math.ceil(max_finished - min_finished)
36
+ bins_dict.update({student_number: bins})
37
+ min_finished_dict.update({student_number: min_finished})
38
+ print(f"student number: {student_number}")
39
+ print(f"min finished: {min_finished}")
40
+ print(f"max finished: {max_finished}")
41
+ print(f"bins finished seconds: {bins}, minutes: {bins / 60}")
42
+
43
+ df["time_line"] = None
44
+ for student_number in student_numbers:
45
+ # calculates time-line for each student group
46
+ df["time_line"] = df.apply(
47
+ lambda x: x["finished_ts"] - min_finished_dict[student_number]
48
+ if x["active_students"] == student_number
49
+ else x["time_line"],
50
+ axis=1
51
+ )
52
+
53
+ # creates a '.csv' from the dataframe
54
+ df.to_csv(f"./data/processed_{log_file}", index=False, sep=";")
55
+
56
+ result = df.groupby(['active_students', 'success']) \
57
+ .agg({
58
+ 'elapsed': ['mean', 'median', 'min', 'max'],
59
+ 'success': ['count'],
60
+ })
61
+
62
+ print(f"Results for {log_file}")
63
+ print(result, "\n")
64
+
65
+ title = None
66
+ if "sentiment" in log_file.lower():
67
+ title = "API result for 'sentiment-analysis' endpoint"
68
+ elif "text2int" in log_file.lower():
69
+ title = "API result for 'text2int' endpoint"
70
+
71
+ for student_number in student_numbers:
72
+ # Prints percentage of the successful and failed calls
73
+ try:
74
+ failed_calls = result.loc[(student_number, 0), 'success'][0]
75
+ except:
76
+ failed_calls = 0
77
+ successful_calls = result.loc[(student_number, 1), 'success'][0]
78
+ percentage = (successful_calls / (failed_calls + successful_calls)) * 100
79
+ print(f"Percentage of successful API calls for {student_number} students: {percentage.__round__(2)}")
80
+
81
+ rows = len(student_numbers)
82
+
83
+ fig, axs = plt.subplots(rows, 2) # (rows, columns)
84
+
85
+ for index, student_number in enumerate(student_numbers):
86
+ # creates a boxplot for each test group
87
+ data = df[df["active_students"] == student_number]
88
+ axs[index][0].boxplot(x=data["elapsed"]) # axs[row][column]
89
+ # axs[index][0].set_title(f'Boxplot for {student_number} students')
90
+ axs[index][0].set_xlabel(f'student number {student_number}')
91
+ axs[index][0].set_ylabel('Elapsed time (s)')
92
+
93
+ # creates a histogram for each test group
94
+ axs[index][1].hist(x=data["elapsed"], bins=25) # axs[row][column]
95
+ # axs[index][1].set_title(f'Histogram for {student_number} students')
96
+ axs[index][1].set_xlabel('seconds')
97
+ axs[index][1].set_ylabel('Count of API calls')
98
+
99
+ fig.suptitle(title, fontsize=16)
100
+
101
+ fig, axs = plt.subplots(rows, 1) # (rows, columns)
102
+
103
+ for index, student_number in enumerate(student_numbers):
104
+ # creates a histogram and shows API calls on a timeline for each test group
105
+ data = df[df["active_students"] == student_number]
106
+
107
+ print(data["time_line"].head(10))
108
+
109
+ axs[index].hist(x=data["time_line"], bins=bins_dict[student_number]) # axs[row][column]
110
+ # axs[index][1].set_title(f'Histogram for {student_number} students')
111
+ axs[index].set_xlabel('seconds')
112
+ axs[index].set_ylabel('Count of API calls')
113
+
114
+ fig.suptitle(title, fontsize=16)
115
+
116
+ plt.show()
static/styles.css ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ @import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300&display=swap');
2
+
3
+ body {
4
+ font-family: 'Roboto', sans-serif;
5
+ font-size: 16px;
6
+ background-color: black;
7
+ color: white
8
+ }
templates/home.html ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>Title</title>
6
+ <link rel="stylesheet" href="{{ url_for('static', path='/styles.css') }}">
7
+ </head>
8
+ <body>
9
+ <h2>Mathbot</h2>
10
+ <h3>Created with FastAPI</h3>
11
+
12
+ <h4>To make a request with python</h4>
13
+ <pre><code>
14
+ import requests
15
+
16
+ requests.post(
17
+ url='https://cetinca-fastapi-ep.hf.space/sentiment-analysis',
18
+ json={"content": "I reject it"}
19
+ ).json()
20
+
21
+ requests.post(
22
+ url='https://cetinca-fastapi-ep.hf.space/text2int',
23
+ json={"content": "forty two"}
24
+ ).json()
25
+
26
+ </code></pre>
27
+
28
+ <h4>To make a request with curl</h4>
29
+ <pre><code>
30
+ curl --silent -X POST "https://cetinca-fastapi-ep.hf.space/sentiment-analysis" -H 'Content-Type: application/json' -d '{"content":"I am happy with it!"}'
31
+
32
+ curl --silent -X POST "https://cetinca-fastapi-ep.hf.space/text2int" -H 'Content-Type: application/json' -d '{"content":"forty two"}'
33
+ </code></pre>
34
+ </body>
35
+ </html>
tests/__init__.py ADDED
File without changes
tests/test_text2int.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+
3
+ from fastapi.testclient import TestClient
4
+
5
+ from app import app
6
+
7
+ TEST_DATA_FILE = "data/test_data_text2int.csv"
8
+
9
+ client = TestClient(app)
10
+
11
+
12
+ class TestStringMethods(unittest.TestCase):
13
+
14
+ def setUp(self):
15
+ self.client = TestClient(app)
16
+
17
+ def test_1(self):
18
+ response = self.client.post("/text2int",
19
+ json={"content": "fourteen"}
20
+ )
21
+ self.assertEqual(response.status_code, 200)
22
+ self.assertEqual(str(response.json()["message"]), "14")
23
+
24
+ def test_2(self):
25
+ response = self.client.post("/text2int",
26
+ json={"content": "one thousand four hundred ninety two"}
27
+ )
28
+ self.assertEqual(response.status_code, 200)
29
+ self.assertEqual(str(response.json()["message"]), "1492")
30
+
31
+ def test_3(self):
32
+ response = self.client.post("/text2int",
33
+ json={"content": "Fourteen Hundred Ninety-Two"}
34
+ )
35
+ self.assertEqual(response.status_code, 200)
36
+ self.assertEqual(str(response.json()["message"]), "1492")
37
+
38
+ def test_4(self):
39
+ response = client.post("/text2int",
40
+ json={"content": "forteen"}
41
+ )
42
+ self.assertEqual(response.status_code, 200)
43
+ self.assertEqual(str(response.json()["message"]), "14")
44
+
45
+ def test_5(self):
46
+ response = client.post("/text2int",
47
+ json={"content": "seventeen-thousand and seventy two"}
48
+ )
49
+ self.assertEqual(response.status_code, 200)
50
+ self.assertEqual(str(response.json()["message"]), "17072")
51
+
52
+ def test_6(self):
53
+ response = client.post("/text2int",
54
+ json={"content": "two hundred and nine"}
55
+ )
56
+ self.assertEqual(response.status_code, 200)
57
+ self.assertEqual(str(response.json()["message"]), "209")
58
+
59
+
60
+ if __name__ == '__main__':
61
+ unittest.main()