mathtext-fastapi

Runtime error

App Files Files Community

cetinca commited on Jan 10, 2023

Commit

46f5320

verified ·

0 Parent(s):

Initial commit

Browse files

Files changed (21) hide show

.gitattributes +34 -0
.gitignore +163 -0
.gitlab-ci.yml +6 -0
Dockerfile +22 -0
README.md +11 -0
app.py +47 -0
data/test_data_text2int.csv +43 -0
modules/__init__.py +0 -0
modules/sentiment.py +8 -0
modules/text2int.py +192 -0
requirements.txt +11 -0
scripts/__init__.py +0 -0
scripts/api_scaling.py +96 -0
scripts/api_scaling.sh +83 -0
scripts/make_request.py +14 -0
scripts/make_request.sh +52 -0
scripts/plot_calls.py +116 -0
static/styles.css +8 -0
templates/home.html +35 -0
tests/__init__.py +0 -0
tests/test_text2int.py +61 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,34 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,163 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+.idea/
+*history_sentiment*
+*history_text2int*

.gitlab-ci.yml ADDED Viewed

	@@ -0,0 +1,6 @@

+run_tests:
+  image: python:3.10-slim-buster
+  before_script:
+    - echo "Image build has started!"
+  script:
+    - pip install -r requirements.txt && pytest --verbose

Dockerfile ADDED Viewed

	@@ -0,0 +1,22 @@

+# https://huggingface.co/docs/hub/spaces-sdks-docker-first-demo
+FROM python:3.9
+WORKDIR /code
+COPY ./requirements.txt /code/requirements.txt
+RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

	@@ -0,0 +1,11 @@

+---
+title: Mathtext Fastapi
+emoji: 🐨
+colorFrom: blue
+colorTo: red
+sdk: docker
+pinned: false
+license: agpl-3.0
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,47 @@

+"""FastAPI endpoint
+To run locally use 'uvicorn modules.app:app --host localhost --port 7860'
+"""
+from fastapi import FastAPI, Request
+from fastapi.responses import JSONResponse
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+from pydantic import BaseModel
+from modules.sentiment import sentiment
+from modules.text2int import text2int
+app = FastAPI()
+app.mount("/static", StaticFiles(directory="static"), name="static")
+templates = Jinja2Templates(directory="templates")
+class Text(BaseModel):
+    content: str = ""
+@app.get("/")
+def home(request: Request):
+    return templates.TemplateResponse("home.html", {"request": request})
+@app.post("/hello")
+def hello(content: Text = None):
+    content = {"message": f"Hello {content.content}!"}
+    return JSONResponse(content=content)
+@app.post("/sentiment-analysis")
+def sentiment_analysis_ep(content: Text = None):
+    ml_response = sentiment(content.content)
+    content = {"message": ml_response}
+    return JSONResponse(content=content)
+@app.post("/text2int")
+def text2int_ep(content: Text = None):
+    ml_response = text2int(content.content)
+    content = {"message": ml_response}
+    return JSONResponse(content=content)

data/test_data_text2int.csv ADDED Viewed

	@@ -0,0 +1,43 @@

+input,output
+fourteen,14
+one thousand four hundred ninety two,1492
+Fourteen Hundred Ninety-Two,1492
+forteen,14
+seventeen-thousand and seventy two,17072
+two hundred and nine,209
+# Following are the ones that the current text2int.py fails on,-
+# ninety nine hundred and seventy seven,9977
+# seven thousands,7000
+# 2 hundreds,200
+# 99 thousands and one,9901
+# "forty-five thousand, seven hundred and nine",45709
+# eighty eight hundred eighty,8880
+# a hundred hundred,10000
+# a hundred thousand,100000
+# a hundred million,100000000
+# nineteen ninety nine,1999
+# forteen twenty seven,1427
+# one nine eight five,1985
+# nineteen eighty-five,1985
+# six,6
+# eighty,80
+# eight three seven five three zero nine,8375308
+# oh one,1
+# six oh 1,601
+# sex,6
+# eight oh,80
+# ate,8
+# double eight,88
+# eight three seven five three O nine,8375308
+# eight three seven five three oh nine,8375308
+# eight three seven five three 0 nine,8375308
+# eight three seven five three oh ni-ee-ine,8375308
+# two thousand ten,2010
+# two thousand and ten,2010
+# twelve million,12000000
+# 8 billion,8000000000
+# twenty ten,2010
+# thirty-two hundred,3200
+# nine,9
+# forty two,42
+# 1 2 three,123

modules/__init__.py ADDED Viewed

File without changes

modules/sentiment.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from transformers import pipeline
+sentiment_obj = pipeline(task="sentiment-analysis", model="distilbert-base-uncased-finetuned-sst-2-english")
+def sentiment(text):
+    # Returns sentiment value
+    return sentiment_obj(text)

modules/text2int.py ADDED Viewed

	@@ -0,0 +1,192 @@

+import spacy  # noqa
+# import os
+# os.environ['KMP_DUPLICATE_LIB_OK']='True'
+# import spacy
+# Change this according to what words should be corrected to
+SPELL_CORRECT_MIN_CHAR_DIFF = 2
+TOKENS2INT_ERROR_INT = 32202
+ONES = [
+    "zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
+    "nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
+    "sixteen", "seventeen", "eighteen", "nineteen",
+]
+CHAR_MAPPING = {
+    "-": " ",
+    "_": " ",
+    "and": " ",
+}
+# CHAR_MAPPING.update((str(i), word) for i, word in enumerate([" " + s + " " for s in ONES]))
+TOKEN_MAPPING = {
+    "and": " ",
+    "oh": "0",
+}
+def find_char_diff(a, b):
+    # Finds the character difference between two str objects by counting the occurences of every character. Not edit distance.
+    char_counts_a = {}
+    char_counts_b = {}
+    for char in a:
+        if char in char_counts_a.keys():
+            char_counts_a[char] += 1
+        else:
+            char_counts_a[char] = 1
+    for char in b:
+        if char in char_counts_b.keys():
+            char_counts_b[char] += 1
+        else:
+            char_counts_b[char] = 1
+    char_diff = 0
+    for i in char_counts_a:
+        if i in char_counts_b.keys():
+            char_diff += abs(char_counts_a[i] - char_counts_b[i])
+        else:
+            char_diff += char_counts_a[i]
+    return char_diff
+def tokenize(text):
+    text = text.lower()
+    # print(text)
+    text = replace_tokens(''.join(i for i in replace_chars(text)).split())
+    # print(text)
+    text = [i for i in text if i != ' ']
+    # print(text)
+    output = []
+    for word in text:
+        # print(word)
+        output.append(convert_word_to_int(word))
+    output = [i for i in output if i != ' ']
+    # print(output)
+    return output
+def detokenize(tokens):
+    return ' '.join(tokens)
+def replace_tokens(tokens, token_mapping=TOKEN_MAPPING):
+    return [token_mapping.get(tok, tok) for tok in tokens]
+def replace_chars(text, char_mapping=CHAR_MAPPING):
+    return [char_mapping.get(c, c) for c in text]
+def convert_word_to_int(in_word, numwords={}):
+    # Converts a single word/str into a single int
+    tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
+    scales = ["hundred", "thousand", "million", "billion", "trillion"]
+    if not numwords:
+        for idx, word in enumerate(ONES):
+            numwords[word] = idx
+        for idx, word in enumerate(tens):
+            numwords[word] = idx * 10
+        for idx, word in enumerate(scales):
+            numwords[word] = 10 ** (idx * 3 or 2)
+    if in_word in numwords:
+        # print(in_word)
+        # print(numwords[in_word])
+        return numwords[in_word]
+    try:
+        int(in_word)
+        return int(in_word)
+    except ValueError:
+        pass
+    # Spell correction using find_char_diff
+    char_diffs = [find_char_diff(in_word, i) for i in ONES + tens + scales]
+    min_char_diff = min(char_diffs)
+    if min_char_diff <= SPELL_CORRECT_MIN_CHAR_DIFF:
+        return char_diffs.index(min_char_diff)
+def tokens2int(tokens):
+    # Takes a list of tokens and returns a int representation of them
+    types = []
+    for i in tokens:
+        if i <= 9:
+            types.append(1)
+        elif i <= 90:
+            types.append(2)
+        else:
+            types.append(3)
+    # print(tokens)
+    if len(tokens) <= 3:
+        current = 0
+        for i, number in enumerate(tokens):
+            if i != 0 and types[i] < types[i - 1] and current != tokens[i - 1] and types[i - 1] != 3:
+                current += tokens[i] + tokens[i - 1]
+            elif current <= tokens[i] and current != 0:
+                current *= tokens[i]
+            elif 3 not in types and 1 not in types:
+                current = int(''.join(str(i) for i in tokens))
+                break
+            elif '111' in ''.join(str(i) for i in types) and 2 not in types and 3 not in types:
+                current = int(''.join(str(i) for i in tokens))
+                break
+            else:
+                current += number
+    elif 3 not in types and 2 not in types:
+        current = int(''.join(str(i) for i in tokens))
+    else:
+        """
+        double_list = []
+        current_double = []
+        double_type_list = []
+        for i in tokens:
+            if len(current_double) < 2:
+                current_double.append(i)
+            else:
+                double_list.append(current_double)
+                current_double = []
+        current_double = []
+        for i in types:
+            if len(current_double) < 2:
+                current_double.append(i)
+            else:
+                double_type_list.append(current_double)
+                current_double = []
+        print(double_type_list)
+        print(double_list)
+        current = 0
+        for i, type_double in enumerate(double_type_list):
+            if len(type_double) == 1:
+                current += double_list[i][0]
+            elif type_double[0] == type_double[1]:
+                current += int(str(double_list[i][0]) + str(double_list[i][1]))
+            elif type_double[0] > type_double[1]:
+                current += sum(double_list[i])
+            elif type_double[0] < type_double[1]:
+                current += double_list[i][0] * double_list[i][1]
+        #print(current)
+        """
+        count = 0
+        current = 0
+        for i, token in enumerate(tokens):
+            count += 1
+            if count == 2:
+                if types[i - 1] == types[i]:
+                    current += int(str(token) + str(tokens[i - 1]))
+                elif types[i - 1] > types[i]:
+                    current += tokens[i - 1] + token
+                else:
+                    current += tokens[i - 1] * token
+                count = 0
+            elif i == len(tokens) - 1:
+                current += token
+    return current
+def text2int(text):
+    # Wraps all of the functions up into one
+    return tokens2int(tokenize(text))

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+fastapi==0.74.*
+requests==2.27.*
+sentencepiece==0.1.*
+torch==1.11.*
+transformers==4.*
+uvicorn[standard]==0.17.*
+pydantic
+spacy
+pandas
+matplotlib
+pytest

scripts/__init__.py ADDED Viewed

File without changes

scripts/api_scaling.py ADDED Viewed

	@@ -0,0 +1,96 @@

+"""https://zetcode.com/python/concurrent-http-requests/"""
+import asyncio
+import random
+import time
+import pandas as pd
+import httpx
+from os.path import exists
+NUMBER_OF_CALLS = 1
+headers = {"Content-Type": "application/json; charset=utf-8"}
+# base_url = "https://tangibleai-mathtext.hf.space/run/{endpoint}"
+base_url = "http://localhost:7860/run/{endpoint}"
+data_list_1 = {
+    "endpoint": "text2int",
+    "test_data": [
+        "one hundred forty five",
+        "twenty thousand nine hundred fifty",
+        "one hundred forty five",
+        "nine hundred eighty three",
+        "five million",
+    ]
+}
+data_list_2 = {
+    "endpoint": "text2int-preprocessed",
+    "test_data": [
+        "one hundred forty five",
+        "twenty thousand nine hundred fifty",
+        "one hundred forty five",
+        "nine hundred eighty three",
+        "five million",
+    ]
+}
+data_list_3 = {
+    "endpoint": "sentiment-analysis",
+    "test_data": [
+        "Totally agree",
+        "I like it",
+        "No more",
+        "I am not sure",
+        "Never",
+    ]
+}
+# async call to endpoint
+async def call_api(url, data, call_number, number_of_calls):
+    json = {"data": [data]}
+    async with httpx.AsyncClient() as client:
+        start = time.perf_counter()  # Used perf_counter for more precise result.
+        response = await client.post(url=url, headers=headers, json=json, timeout=30)
+        end = time.perf_counter()
+        return {
+            "endpoint": url.split("/")[-1],
+            "test data": data,
+            "status code": response.status_code,
+            "response": response.json().get("data"),
+            "call number": call_number,
+            "number of calls": number_of_calls,
+            "start": start.__round__(4),
+            "end": end.__round__(4),
+            "delay": (end - start).__round__(4)
+        }
+data_lists = [data_list_1, data_list_2, data_list_3]
+results = []
+async def main(number_of_calls):
+    for data_list in data_lists:
+        calls = []
+        for call_number in range(1, number_of_calls + 1):
+            url = base_url.format(endpoint=data_list["endpoint"])
+            data = random.choice(data_list["test_data"])
+            calls.append(call_api(url, data, call_number, number_of_calls))
+        r = await asyncio.gather(*calls)
+        results.extend(r)
+start = time.perf_counter()
+asyncio.run(main(NUMBER_OF_CALLS))
+end = time.perf_counter()
+print(end-start)
+df = pd.DataFrame(results)
+if exists("call_history.csv"):
+    df.to_csv(path_or_buf="call_history.csv", mode="a", header=False, index=False)
+else:
+    df.to_csv(path_or_buf="call_history.csv", mode="w", header=True, index=False)

scripts/api_scaling.sh ADDED Viewed

	@@ -0,0 +1,83 @@

+#! /bin/env bash
+LOG_FILE_NAME="call_history_bash.csv"
+if [[ ! -f "$LOG_FILE_NAME" ]]; then
+  # Creation of column names if the file does not exits
+  echo "student_id;active_students;endpoint;inputs;outputs;started;finished" >$LOG_FILE_NAME
+fi
+data_list_1() {
+  responses=(
+    "one hundred forty five"
+    "twenty thousand nine hundred fifty"
+    "one hundred forty five"
+    "nine hundred eighty three"
+    "five million"
+  )
+  echo "${responses[$1]}"
+}
+data_list_2() {
+  responses=(
+    "Totally agree"
+    "I like it"
+    "No more"
+    "I am not sure"
+    "Never"
+  )
+  echo "${responses[$1]}"
+}
+# endpoints: "text2int" "sentiment-analysis"
+# selected endpoint to test
+endpoint="sentiment-analysis"
+create_random_delay() {
+  # creates a random delay for given arguments
+  echo "scale=8; $RANDOM/32768*$1" | bc
+}
+simulate_student() {
+  # Student simulator waits randomly between 0-10s after an interaction.
+  # Based on 100 interactions per student
+  for i in {1..100}; do
+    random_value=$((RANDOM % 5))
+    text=$(data_list_2 $random_value)
+    data='{"data": ["'$text'"]}'
+    start_=$(date +"%F %T.%6N")
+    url="https://tangibleai-mathtext.hf.space/run/$3"
+    response=$(curl --silent --connect-timeout 30 --max-time 30 -X POST "$url" -H 'Content-Type: application/json' -d "$data")
+    if [[ "$response" == *"Time-out"* ]]; then
+      echo "$response" >>bad_response.txt
+      response="504 Gateway Time-out"
+    elif [[ -z "$response" ]]; then
+      echo "No response" >>bad_response.txt
+      response="504 Gateway Time-out"
+    fi
+    end_=$(date +"%F %T.%6N")
+    printf "%s;%s;%s;%s;%s;%s;%s\n" "$1" "$2" "$3" "$data" "$response" "$start_" "$end_" >>$LOG_FILE_NAME
+    sleep "$(create_random_delay 10)"
+  done
+}
+echo "start: $(date)"
+active_students=250 # the number of students using the system at the same time
+i=1
+while [[ "$i" -le "$active_students" ]]; do
+  simulate_student "student$i" "$active_students" "$endpoint" &
+  sleep "$(create_random_delay 1)" # adding a random delay between students
+  i=$(("$i" + 1))
+done
+wait
+echo "end: $(date)"

scripts/make_request.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import requests
+request = requests.post(url=
+                        'https://cetinca-fastapi-ep.hf.space/sentiment-analysis',
+                        json={"content": "I reject it"}).json()
+print(request)
+request = requests.post(url=
+                        'https://cetinca-fastapi-ep.hf.space/text2int',
+                        json={"content": "seven thousand nine hundred fifty seven"}
+                        ).json()
+print(request)

scripts/make_request.sh ADDED Viewed

	@@ -0,0 +1,52 @@

+#root_url="localhost:7860"
+root_url="https://cetinca-fastapi-ep.hf.space"
+ep="/"
+url=$root_url$ep
+data=''
+response=$(curl --silent -X GET "$url" -H 'Content-Type: application/json')
+echo "URL: $url"
+echo "Data: $data"
+echo "Response: $response"
+echo
+sleep 0.1
+ep="/hello"
+url=$root_url$ep
+data='{"content":"Rori"}'
+response=$(curl --silent -X POST "$url" -H 'Content-Type: application/json' -d "$data")
+echo "URL: $url"
+echo "Data: $data"
+echo "Response: $response"
+echo
+sleep 0.5
+ep="/sentiment-analysis"
+url=$root_url$ep
+data='{"content":"I am happy with it!"}'
+response=$(curl --silent -X POST "$url" -H 'Content-Type: application/json' -d "$data")
+echo "URL: $url"
+echo "Data: $data"
+echo "Response: $response"
+echo
+sleep 0.5
+ep="/text2int"
+url=$root_url$ep
+data='{"content":"one hundred forty two"}'
+response=$(curl --silent -X POST "$url" -H 'Content-Type: application/json' -d "$data")
+echo "URL: $url"
+echo "Data: $data"
+echo "Response: $response"
+echo

scripts/plot_calls.py ADDED Viewed

	@@ -0,0 +1,116 @@

+import math
+from datetime import datetime
+import matplotlib.pyplot as plt
+import pandas as pd
+pd.set_option('display.max_columns', None)
+pd.set_option('display.max_rows', None)
+log_files = [
+    'call_history_sentiment_1_bash.csv',
+    'call_history_text2int_1_bash.csv',
+]
+for log_file in log_files:
+    path_ = f"./data/{log_file}"
+    df = pd.read_csv(filepath_or_buffer=path_, sep=";")
+    df["finished_ts"] = df["finished"].apply(
+        lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f").timestamp())
+    df["started_ts"] = df["started"].apply(
+        lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S.%f").timestamp())
+    df["elapsed"] = df["finished_ts"] - df["started_ts"]
+    df["success"] = df["outputs"].apply(lambda x: 0 if "Time-out" in x else 1)
+    student_numbers = sorted(df['active_students'].unique())
+    bins_dict = dict()  # bins size for each group
+    min_finished_dict = dict()  # zero time for each group
+    for student_number in student_numbers:
+        # for each student group calculates bins size and zero time
+        min_finished = df["finished_ts"][df["active_students"] == student_number].min()
+        max_finished = df["finished_ts"][df["active_students"] == student_number].max()
+        bins = math.ceil(max_finished - min_finished)
+        bins_dict.update({student_number: bins})
+        min_finished_dict.update({student_number: min_finished})
+        print(f"student number: {student_number}")
+        print(f"min finished: {min_finished}")
+        print(f"max finished: {max_finished}")
+        print(f"bins finished seconds: {bins}, minutes: {bins / 60}")
+    df["time_line"] = None
+    for student_number in student_numbers:
+        # calculates time-line for each student group
+        df["time_line"] = df.apply(
+            lambda x: x["finished_ts"] - min_finished_dict[student_number]
+            if x["active_students"] == student_number
+            else x["time_line"],
+            axis=1
+        )
+    # creates a '.csv' from the dataframe
+    df.to_csv(f"./data/processed_{log_file}", index=False, sep=";")
+    result = df.groupby(['active_students', 'success']) \
+        .agg({
+        'elapsed': ['mean', 'median', 'min', 'max'],
+        'success': ['count'],
+    })
+    print(f"Results for {log_file}")
+    print(result, "\n")
+    title = None
+    if "sentiment" in log_file.lower():
+        title = "API result for 'sentiment-analysis' endpoint"
+    elif "text2int" in log_file.lower():
+        title = "API result for 'text2int' endpoint"
+    for student_number in student_numbers:
+        # Prints percentage of the successful and failed calls
+        try:
+            failed_calls = result.loc[(student_number, 0), 'success'][0]
+        except:
+            failed_calls = 0
+        successful_calls = result.loc[(student_number, 1), 'success'][0]
+        percentage = (successful_calls / (failed_calls + successful_calls)) * 100
+        print(f"Percentage of successful API calls for {student_number} students: {percentage.__round__(2)}")
+    rows = len(student_numbers)
+    fig, axs = plt.subplots(rows, 2)  # (rows, columns)
+    for index, student_number in enumerate(student_numbers):
+        # creates a boxplot for each test group
+        data = df[df["active_students"] == student_number]
+        axs[index][0].boxplot(x=data["elapsed"])  # axs[row][column]
+        # axs[index][0].set_title(f'Boxplot for {student_number} students')
+        axs[index][0].set_xlabel(f'student number {student_number}')
+        axs[index][0].set_ylabel('Elapsed time (s)')
+        # creates a histogram for each test group
+        axs[index][1].hist(x=data["elapsed"], bins=25)  # axs[row][column]
+        # axs[index][1].set_title(f'Histogram for {student_number} students')
+        axs[index][1].set_xlabel('seconds')
+        axs[index][1].set_ylabel('Count of API calls')
+    fig.suptitle(title, fontsize=16)
+    fig, axs = plt.subplots(rows, 1)  # (rows, columns)
+    for index, student_number in enumerate(student_numbers):
+        # creates a histogram and shows API calls on a timeline for each test group
+        data = df[df["active_students"] == student_number]
+        print(data["time_line"].head(10))
+        axs[index].hist(x=data["time_line"], bins=bins_dict[student_number])  # axs[row][column]
+        # axs[index][1].set_title(f'Histogram for {student_number} students')
+        axs[index].set_xlabel('seconds')
+        axs[index].set_ylabel('Count of API calls')
+    fig.suptitle(title, fontsize=16)
+plt.show()

static/styles.css ADDED Viewed

	@@ -0,0 +1,8 @@

+@import url('https://fonts.googleapis.com/css2?family=Roboto:wght@300&display=swap');
+body {
+font-family: 'Roboto', sans-serif;
+font-size: 16px;
+background-color: black;
+color: white
+}

templates/home.html ADDED Viewed

	@@ -0,0 +1,35 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <title>Title</title>
+    <link rel="stylesheet" href="{{ url_for('static', path='/styles.css') }}">
+</head>
+<body>
+<h2>Mathbot</h2>
+<h3>Created with FastAPI</h3>
+<h4>To make a request with python</h4>
+<pre><code>
+import requests
+requests.post(
+    url='https://cetinca-fastapi-ep.hf.space/sentiment-analysis',
+    json={"content": "I reject it"}
+    ).json()
+requests.post(
+    url='https://cetinca-fastapi-ep.hf.space/text2int',
+    json={"content": "forty two"}
+    ).json()
+</code></pre>
+<h4>To make a request with curl</h4>
+<pre><code>
+curl --silent -X POST "https://cetinca-fastapi-ep.hf.space/sentiment-analysis" -H 'Content-Type: application/json' -d '{"content":"I am happy with it!"}'
+curl --silent -X POST "https://cetinca-fastapi-ep.hf.space/text2int" -H 'Content-Type: application/json' -d '{"content":"forty two"}'
+</code></pre>
+</body>
+</html>

tests/__init__.py ADDED Viewed

File without changes

tests/test_text2int.py ADDED Viewed

	@@ -0,0 +1,61 @@

+import unittest
+from fastapi.testclient import TestClient
+from app import app
+TEST_DATA_FILE = "data/test_data_text2int.csv"
+client = TestClient(app)
+class TestStringMethods(unittest.TestCase):
+    def setUp(self):
+        self.client = TestClient(app)
+    def test_1(self):
+        response = self.client.post("/text2int",
+                                    json={"content": "fourteen"}
+                                    )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(str(response.json()["message"]), "14")
+    def test_2(self):
+        response = self.client.post("/text2int",
+                                    json={"content": "one thousand four hundred ninety two"}
+                                    )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(str(response.json()["message"]), "1492")
+    def test_3(self):
+        response = self.client.post("/text2int",
+                                    json={"content": "Fourteen Hundred Ninety-Two"}
+                                    )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(str(response.json()["message"]), "1492")
+    def test_4(self):
+        response = client.post("/text2int",
+                               json={"content": "forteen"}
+                               )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(str(response.json()["message"]), "14")
+    def test_5(self):
+        response = client.post("/text2int",
+                               json={"content": "seventeen-thousand and seventy two"}
+                               )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(str(response.json()["message"]), "17072")
+    def test_6(self):
+        response = client.post("/text2int",
+                               json={"content": "two hundred and nine"}
+                               )
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(str(response.json()["message"]), "209")
+if __name__ == '__main__':
+    unittest.main()