jeremiebasso commited on
Commit
8fe5582
1 Parent(s): fee204b

initial commit

Browse files
Files changed (8) hide show
  1. .gitignore +37 -0
  2. app.py +46 -0
  3. configuration.py +8 -0
  4. onnx_model.py +82 -0
  5. postprocess.py +76 -0
  6. requirements.txt +7 -0
  7. theme.py +36 -0
  8. utils.py +52 -0
.gitignore ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+
30
+ .mypy_cache
31
+ .vscode
32
+ .pylintrc
33
+ .python-version
34
+ .prettierignore
35
+ */.pytest_cache
36
+ .env
37
+ model/*
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer
3
+ from transformers.utils import logging as hf_logging
4
+
5
+ from configuration import Config
6
+ from onnx_model import ONNXModel
7
+ from postprocess import get_sentiment
8
+ from theme import theme
9
+ from utils import download_model
10
+
11
+ hf_logging.disable_progress_bar()
12
+ config = Config()
13
+
14
+ model_path = download_model("ml-sentiment-adapter", "production")
15
+ model = ONNXModel.from_dir(model_path)
16
+ tokenizer = AutoTokenizer.from_pretrained(model.model_info.base_model)
17
+
18
+
19
+ def predict(sentence: str):
20
+ encoding = tokenizer([sentence], truncation=True, return_tensors="np")
21
+ logits = model(**encoding)
22
+ score, sentiment = get_sentiment(logits, config.negative_threshold, config.positive_threshold, config.zero)
23
+ result = {
24
+ sentiment: score
25
+ }
26
+ return result
27
+
28
+
29
+ demo = gr.Interface(
30
+ fn=predict,
31
+ inputs=gr.Textbox(label="Customer Review", value="Lettria truelly handled all the overhead of a NLP project !"),
32
+ outputs=gr.Label(label="Sentiment Level"),
33
+ title="Lettria's Customer Sentiment Analysis",
34
+ description="Introducing our Sentiment Analysis API powered by Deep Learning! It provides an easy-to-use solution for analyzing and understanding the sentiment expressed in text. With this API, you can gain valuable insights from customer feedback, social media posts, and reviews by accurately classifying text into positive, negative, or neutral sentiment categories. Seamlessly integrate it into your applications to make data-driven decisions, monitor brand reputation, and enhance customer satisfaction in real-time. Uncover the true sentiment behind text and unlock the power of sentiment analysis today!",
35
+ examples=[
36
+ "I absolutely loved the movie! The storyline was captivating, and the acting was superb.",
37
+ "I'm extremely disappointed with the quality of the product. It broke within a week of use.",
38
+ "Today has been an average day. Nothing particularly good or bad happened.",
39
+ "This book is a masterpiece. The author's writing style is brilliant, and the characters are well-developed.",
40
+ "I'm feeling neutral about the new restaurant. The ambiance was nice, but the food was mediocre.",
41
+ ],
42
+ theme=theme,
43
+ allow_flagging="never",
44
+ )
45
+
46
+ demo.launch()
configuration.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
+
3
+
4
+ class Config(BaseSettings):
5
+ model_config = SettingsConfigDict(env_prefix="lt_")
6
+ negative_threshold: float = -0.65
7
+ positive_threshold: float = 0.37
8
+ zero: float = 0
onnx_model.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import Any
7
+
8
+ import numpy as np
9
+ import onnxruntime as ort
10
+ from loguru import logger
11
+ from onnxruntime.transformers.io_binding_helper import TypeHelper
12
+
13
+
14
+ @dataclass
15
+ class ModelInfo:
16
+ base_model: str
17
+
18
+ @classmethod
19
+ def from_dir(cls, model_dir: Path):
20
+ with open(model_dir / "metadata.json", "r", encoding="utf-8") as file:
21
+ data = json.load(file)
22
+ return ModelInfo(base_model=data["bert_type"])
23
+
24
+
25
+ class ONNXModel:
26
+ def __init__(self, model: ort.InferenceSession, model_info: ModelInfo) -> None:
27
+ self.model = model
28
+ self.model_info = model_info
29
+ self.model_path = Path(model._model_path) # type: ignore
30
+ self.model_name = self.model_path.name
31
+
32
+ self.providers = model.get_providers()
33
+
34
+ if self.providers[0] in ["CUDAExecutionProvider", "TensorrtExecutionProvider"]:
35
+ self.device = "cuda"
36
+ else:
37
+ self.device = "cpu"
38
+
39
+ self.io_types = TypeHelper.get_io_numpy_type_map(model)
40
+
41
+ self.input_names = [el.name for el in model.get_inputs()]
42
+ self.output_name = model.get_outputs()[0].name
43
+
44
+ @staticmethod
45
+ def load_session(
46
+ path: str | Path,
47
+ provider: str = "CPUExecutionProvider",
48
+ session_options: ort.SessionOptions | None = None,
49
+ provider_options: dict[str, Any] | None = None,
50
+ ) -> ort.InferenceSession:
51
+ providers = [provider]
52
+ if provider == "TensorrtExecutionProvider":
53
+ providers.append("CUDAExecutionProvider")
54
+ elif provider == "CUDAExecutionProvider":
55
+ providers.append("CPUExecutionProvider")
56
+
57
+ if not isinstance(path, str):
58
+ path = Path(path) / "model.onnx"
59
+
60
+ providers_options = None
61
+ if provider_options is not None:
62
+ providers_options = [provider_options] + [{} for _ in range(len(providers) - 1)]
63
+
64
+ session = ort.InferenceSession(
65
+ str(path),
66
+ providers=providers,
67
+ sess_options=session_options,
68
+ provider_options=providers_options,
69
+ )
70
+ logger.info("Session loaded")
71
+ return session
72
+
73
+ @classmethod
74
+ def from_dir(cls, model_dir: str | Path) -> ONNXModel:
75
+ return ONNXModel(ONNXModel.load_session(model_dir), ModelInfo.from_dir(model_dir))
76
+
77
+ def __call__(self, **model_inputs: np.ndarray):
78
+ model_inputs = {
79
+ input_name: tensor.astype(self.io_types[input_name]) for input_name, tensor in model_inputs.items()
80
+ }
81
+
82
+ return self.model.run([self.output_name], model_inputs)[0]
postprocess.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def softmax(x: np.ndarray, axis=1) -> np.ndarray:
5
+ """
6
+ Computes softmax array along the specified axis.
7
+ """
8
+ e_x = np.exp(x)
9
+ return e_x / e_x.sum(axis=axis, keepdims=True)
10
+
11
+
12
+ def calibrate_sentiment_score(
13
+ sentiment: float,
14
+ thresh_neg: float,
15
+ thresh_pos: float,
16
+ zero: float = 0,
17
+ ) -> float:
18
+ if thresh_neg != (zero - 1) / 2:
19
+ alpha_neg = -(3 * zero - 1 - 4 * thresh_neg) / (2 * zero - 2 - 4 * thresh_neg) / 2
20
+ if -1 < alpha_neg and alpha_neg < 0:
21
+ raise ValueError(f"Incorrect value: {thresh_neg=} is too far from -0.5!")
22
+ if thresh_pos != (zero + 1) / 2:
23
+ alpha_pos = -(4 * thresh_pos - 1 - 3 * zero) / (2 + 2 * zero - 4 * thresh_pos) / 2
24
+ if 0 < alpha_pos and alpha_pos < 1:
25
+ raise ValueError(f"Incorrect value: {thresh_pos=} is too far from 0.5!")
26
+ if sentiment < 0:
27
+ return (2 * zero - 2 - 4 * thresh_neg) * sentiment**2 + (3 * zero - 1 - 4 * thresh_neg) * sentiment + zero
28
+ elif sentiment > 0:
29
+ return (2 + 2 * zero - 4 * thresh_pos) * sentiment**2 + (4 * thresh_pos - 1 - 3 * zero) * sentiment + zero
30
+ return zero
31
+
32
+
33
+ def calibrate_sentiment(
34
+ sentiments: np.ndarray[float],
35
+ thresh_neg: float,
36
+ thresh_pos: float,
37
+ zero: float,
38
+ ) -> np.ndarray[np.float64]:
39
+ result = np.array(
40
+ [
41
+ calibrate_sentiment_score(sentiment, thresh_neg=thresh_neg, thresh_pos=thresh_pos, zero=zero)
42
+ for sentiment in sentiments
43
+ ]
44
+ )
45
+ return result.astype(np.float64)
46
+
47
+
48
+ def scale_value(value, in_min, in_max, out_min, out_max):
49
+ if in_min <= value <= in_max:
50
+ scaled_value = (value - in_min) / (in_max - in_min) * (out_max - out_min) + out_min
51
+ return scaled_value.round(3)
52
+ else:
53
+ raise ValueError(f"Input value must be in the range [{in_min}, {in_max}]")
54
+
55
+
56
+
57
+ def get_sentiment(
58
+ logits: np.ndarray,
59
+ thresh_neg: float,
60
+ thresh_pos: float,
61
+ zero: float,
62
+ ):
63
+ probabilities = softmax(logits, axis=1)
64
+ sentiments = np.matmul(probabilities, np.arange(5)) / 2 - 1
65
+ score = calibrate_sentiment(
66
+ sentiments=sentiments,
67
+ thresh_neg=thresh_neg,
68
+ thresh_pos=thresh_pos,
69
+ zero=zero,
70
+ )[0]
71
+ if score < -0.33:
72
+ return scale_value(score, -1, -0.33, 0, 1), "NEGATIVE"
73
+ elif score < 0.33:
74
+ return scale_value(score, -0.33, 0.33, 0, 1), "NEUTRAL"
75
+ else:
76
+ return scale_value(score, 0.33, 1, 0, 1), "POSITIVE"
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ transformers
2
+ pydantic
3
+ pydantic_settings
4
+ numpy
5
+ onnxruntime
6
+ loguru
7
+ mlflow
theme.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ _PRIMARY = gr.themes.colors.Color(
4
+ name="lt_1",
5
+ c50="#7A48E7",
6
+ c100="#7A48E7",
7
+ c200="#7A48E7",
8
+ c300="#7A48E7",
9
+ c400="#7A48E7",
10
+ c500="#7A48E7",
11
+ c600="#F7F0FF",
12
+ c700="#F7F0FF",
13
+ c800="#F7F0FF",
14
+ c900="#F7F0FF",
15
+ c950="#F7F0FF",
16
+ )
17
+
18
+ _SECONDARY = gr.themes.colors.Color(
19
+ name="lt_2",
20
+ c50="#F6F0FF",
21
+ c100="#F6F0FF",
22
+ c200="#F6F0FF",
23
+ c300="#F5F0FF",
24
+ c400="#F5F0FF",
25
+ c500="#D8CDF6",
26
+ c600="#D8CDF6",
27
+ c700="#BCADEC",
28
+ c800="#BCADEC",
29
+ c900="#9580D8",
30
+ c950="#9580D8",
31
+ )
32
+
33
+ theme = gr.themes.Default(
34
+ primary_hue=_PRIMARY,
35
+ secondary_hue=_SECONDARY,
36
+ )
utils.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Utils"""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ from pathlib import Path
6
+ from typing import Literal
7
+
8
+ from loguru import logger
9
+
10
+
11
+ def download_model(
12
+ model_name: str,
13
+ model_stage: Literal["staging", "production"],
14
+ model_dir: str | Path = "model",
15
+ ) -> Path:
16
+ """Download model from mlflow"""
17
+ import mlflow.artifacts
18
+ import mlflow.models
19
+ from mlflow.client import MlflowClient
20
+
21
+ logger.info(f"Looking for model {model_name}/{model_stage}")
22
+
23
+ if isinstance(model_dir, str):
24
+ model_dir = Path(model_dir)
25
+
26
+ client = MlflowClient()
27
+ model_versions = client.get_latest_versions(model_name, stages=[model_stage])
28
+ if len(model_versions) != 1:
29
+ raise ValueError(f"No model version for {model_name}/{model_stage}")
30
+
31
+ artifact_uri = model_versions[0].source
32
+ model_version = model_versions[0].version
33
+
34
+ logger.info(f"Found version {model_version} for {model_name}/{model_stage}")
35
+
36
+ model_path = model_dir / artifact_uri.split("/")[-1] # type: ignore
37
+ if model_path.exists():
38
+ logger.info(f"Found model in {model_path}, skipping download")
39
+ return model_path
40
+
41
+ logger.info(f"Downloading artifacts {artifact_uri} to {model_dir}")
42
+ model_path = mlflow.artifacts.download_artifacts(artifact_uri, dst_path=str(model_dir))
43
+ logger.info(f"Succesfully downloaded {model_name}")
44
+
45
+ model_info = mlflow.models.get_model_info(model_path)
46
+ metadata = model_info.metadata
47
+ metadata_path = Path(model_path) / "metadata.json"
48
+ logger.info(f"Saving metadata to {metadata_path}")
49
+ with open(metadata_path, "w", encoding="utf-8") as file:
50
+ json.dump(metadata, file)
51
+
52
+ return Path(model_path)