Spaces:

Lettria
/

customer-sentiment-analysis

Running

App Files Files Community

jeremiebasso commited on Oct 5, 2023

Commit

8fe5582

•

1 Parent(s): fee204b

initial commit

Browse files

Files changed (8) hide show

.gitignore +37 -0
app.py +46 -0
configuration.py +8 -0
onnx_model.py +82 -0
postprocess.py +76 -0
requirements.txt +7 -0
theme.py +36 -0
utils.py +52 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,37 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+.mypy_cache
+.vscode
+.pylintrc
+.python-version
+.prettierignore
+*/.pytest_cache
+.env
+model/*

app.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import gradio as gr
+from transformers import AutoTokenizer
+from transformers.utils import logging as hf_logging
+from configuration import Config
+from onnx_model import ONNXModel
+from postprocess import get_sentiment
+from theme import theme
+from utils import download_model
+hf_logging.disable_progress_bar()
+config = Config()
+model_path = download_model("ml-sentiment-adapter", "production")
+model = ONNXModel.from_dir(model_path)
+tokenizer = AutoTokenizer.from_pretrained(model.model_info.base_model)
+def predict(sentence: str):
+    encoding = tokenizer([sentence], truncation=True, return_tensors="np")
+    logits = model(**encoding)
+    score, sentiment = get_sentiment(logits, config.negative_threshold, config.positive_threshold, config.zero)
+    result = {
+        sentiment: score
+    }
+    return result
+demo = gr.Interface(
+    fn=predict,
+    inputs=gr.Textbox(label="Customer Review", value="Lettria truelly handled all the overhead of a NLP project !"),
+    outputs=gr.Label(label="Sentiment Level"),
+    title="Lettria's Customer Sentiment Analysis",
+    description="Introducing our Sentiment Analysis API powered by Deep Learning! It provides an easy-to-use solution for analyzing and understanding the sentiment expressed in text. With this API, you can gain valuable insights from customer feedback, social media posts, and reviews by accurately classifying text into positive, negative, or neutral sentiment categories. Seamlessly integrate it into your applications to make data-driven decisions, monitor brand reputation, and enhance customer satisfaction in real-time. Uncover the true sentiment behind text and unlock the power of sentiment analysis today!",
+    examples=[
+        "I absolutely loved the movie! The storyline was captivating, and the acting was superb.",
+        "I'm extremely disappointed with the quality of the product. It broke within a week of use.",
+        "Today has been an average day. Nothing particularly good or bad happened.",
+        "This book is a masterpiece. The author's writing style is brilliant, and the characters are well-developed.",
+        "I'm feeling neutral about the new restaurant. The ambiance was nice, but the food was mediocre.",
+    ],
+    theme=theme,
+    allow_flagging="never",
+)
+demo.launch()

configuration.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from pydantic_settings import BaseSettings, SettingsConfigDict
+class Config(BaseSettings):
+    model_config = SettingsConfigDict(env_prefix="lt_")
+    negative_threshold: float = -0.65
+    positive_threshold: float = 0.37
+    zero: float = 0

onnx_model.py ADDED Viewed

	@@ -0,0 +1,82 @@

+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any
+import numpy as np
+import onnxruntime as ort
+from loguru import logger
+from onnxruntime.transformers.io_binding_helper import TypeHelper
+@dataclass
+class ModelInfo:
+    base_model: str
+    @classmethod
+    def from_dir(cls, model_dir: Path):
+        with open(model_dir / "metadata.json", "r", encoding="utf-8") as file:
+            data = json.load(file)
+        return ModelInfo(base_model=data["bert_type"])
+class ONNXModel:
+    def __init__(self, model: ort.InferenceSession, model_info: ModelInfo) -> None:
+        self.model = model
+        self.model_info = model_info
+        self.model_path = Path(model._model_path)  # type: ignore
+        self.model_name = self.model_path.name
+        self.providers = model.get_providers()
+        if self.providers[0] in ["CUDAExecutionProvider", "TensorrtExecutionProvider"]:
+            self.device = "cuda"
+        else:
+            self.device = "cpu"
+        self.io_types = TypeHelper.get_io_numpy_type_map(model)
+        self.input_names = [el.name for el in model.get_inputs()]
+        self.output_name = model.get_outputs()[0].name
+    @staticmethod
+    def load_session(
+        path: str | Path,
+        provider: str = "CPUExecutionProvider",
+        session_options: ort.SessionOptions | None = None,
+        provider_options: dict[str, Any] | None = None,
+    ) -> ort.InferenceSession:
+        providers = [provider]
+        if provider == "TensorrtExecutionProvider":
+            providers.append("CUDAExecutionProvider")
+        elif provider == "CUDAExecutionProvider":
+            providers.append("CPUExecutionProvider")
+        if not isinstance(path, str):
+            path = Path(path) / "model.onnx"
+        providers_options = None
+        if provider_options is not None:
+            providers_options = [provider_options] + [{} for _ in range(len(providers) - 1)]
+        session = ort.InferenceSession(
+            str(path),
+            providers=providers,
+            sess_options=session_options,
+            provider_options=providers_options,
+        )
+        logger.info("Session loaded")
+        return session
+    @classmethod
+    def from_dir(cls, model_dir: str | Path) -> ONNXModel:
+        return ONNXModel(ONNXModel.load_session(model_dir), ModelInfo.from_dir(model_dir))
+    def __call__(self, **model_inputs: np.ndarray):
+        model_inputs = {
+            input_name: tensor.astype(self.io_types[input_name]) for input_name, tensor in model_inputs.items()
+        }
+        return self.model.run([self.output_name], model_inputs)[0]

postprocess.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import numpy as np
+def softmax(x: np.ndarray, axis=1) -> np.ndarray:
+    """
+    Computes softmax array along the specified axis.
+    """
+    e_x = np.exp(x)
+    return e_x / e_x.sum(axis=axis, keepdims=True)
+def calibrate_sentiment_score(
+    sentiment: float,
+    thresh_neg: float,
+    thresh_pos: float,
+    zero: float = 0,
+) -> float:
+    if thresh_neg != (zero - 1) / 2:
+        alpha_neg = -(3 * zero - 1 - 4 * thresh_neg) / (2 * zero - 2 - 4 * thresh_neg) / 2
+        if -1 < alpha_neg and alpha_neg < 0:
+            raise ValueError(f"Incorrect value: {thresh_neg=} is too far from -0.5!")
+    if thresh_pos != (zero + 1) / 2:
+        alpha_pos = -(4 * thresh_pos - 1 - 3 * zero) / (2 + 2 * zero - 4 * thresh_pos) / 2
+        if 0 < alpha_pos and alpha_pos < 1:
+            raise ValueError(f"Incorrect value: {thresh_pos=} is too far from 0.5!")
+    if sentiment < 0:
+        return (2 * zero - 2 - 4 * thresh_neg) * sentiment**2 + (3 * zero - 1 - 4 * thresh_neg) * sentiment + zero
+    elif sentiment > 0:
+        return (2 + 2 * zero - 4 * thresh_pos) * sentiment**2 + (4 * thresh_pos - 1 - 3 * zero) * sentiment + zero
+    return zero
+def calibrate_sentiment(
+    sentiments: np.ndarray[float],
+    thresh_neg: float,
+    thresh_pos: float,
+    zero: float,
+) -> np.ndarray[np.float64]:
+    result = np.array(
+        [
+            calibrate_sentiment_score(sentiment, thresh_neg=thresh_neg, thresh_pos=thresh_pos, zero=zero)
+            for sentiment in sentiments
+        ]
+    )
+    return result.astype(np.float64)
+def scale_value(value, in_min, in_max, out_min, out_max):
+    if in_min <= value <= in_max:
+        scaled_value = (value - in_min) / (in_max - in_min) * (out_max - out_min) + out_min
+        return scaled_value.round(3)
+    else:
+        raise ValueError(f"Input value must be in the range [{in_min}, {in_max}]")
+def get_sentiment(
+    logits: np.ndarray,
+    thresh_neg: float,
+    thresh_pos: float,
+    zero: float,
+):
+    probabilities = softmax(logits, axis=1)
+    sentiments = np.matmul(probabilities, np.arange(5)) / 2 - 1
+    score = calibrate_sentiment(
+        sentiments=sentiments,
+        thresh_neg=thresh_neg,
+        thresh_pos=thresh_pos,
+        zero=zero,
+    )[0]
+    if score < -0.33:
+        return scale_value(score, -1, -0.33, 0, 1), "NEGATIVE"
+    elif score < 0.33:
+        return scale_value(score, -0.33, 0.33, 0, 1), "NEUTRAL"
+    else:
+        return scale_value(score, 0.33, 1, 0, 1), "POSITIVE"

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+transformers
+pydantic
+pydantic_settings
+numpy
+onnxruntime
+loguru
+mlflow

theme.py ADDED Viewed

	@@ -0,0 +1,36 @@

+import gradio as gr
+_PRIMARY = gr.themes.colors.Color(
+    name="lt_1",
+    c50="#7A48E7",
+    c100="#7A48E7",
+    c200="#7A48E7",
+    c300="#7A48E7",
+    c400="#7A48E7",
+    c500="#7A48E7",
+    c600="#F7F0FF",
+    c700="#F7F0FF",
+    c800="#F7F0FF",
+    c900="#F7F0FF",
+    c950="#F7F0FF",
+)
+_SECONDARY = gr.themes.colors.Color(
+    name="lt_2",
+    c50="#F6F0FF",
+    c100="#F6F0FF",
+    c200="#F6F0FF",
+    c300="#F5F0FF",
+    c400="#F5F0FF",
+    c500="#D8CDF6",
+    c600="#D8CDF6",
+    c700="#BCADEC",
+    c800="#BCADEC",
+    c900="#9580D8",
+    c950="#9580D8",
+)
+theme = gr.themes.Default(
+    primary_hue=_PRIMARY,
+    secondary_hue=_SECONDARY,
+)

utils.py ADDED Viewed

	@@ -0,0 +1,52 @@

+"""Utils"""
+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Literal
+from loguru import logger
+def download_model(
+    model_name: str,
+    model_stage: Literal["staging", "production"],
+    model_dir: str | Path = "model",
+) -> Path:
+    """Download model from mlflow"""
+    import mlflow.artifacts
+    import mlflow.models
+    from mlflow.client import MlflowClient
+    logger.info(f"Looking for model {model_name}/{model_stage}")
+    if isinstance(model_dir, str):
+        model_dir = Path(model_dir)
+    client = MlflowClient()
+    model_versions = client.get_latest_versions(model_name, stages=[model_stage])
+    if len(model_versions) != 1:
+        raise ValueError(f"No model version for {model_name}/{model_stage}")
+    artifact_uri = model_versions[0].source
+    model_version = model_versions[0].version
+    logger.info(f"Found version {model_version} for {model_name}/{model_stage}")
+    model_path = model_dir / artifact_uri.split("/")[-1]  # type: ignore
+    if model_path.exists():
+        logger.info(f"Found model in {model_path}, skipping download")
+        return model_path
+    logger.info(f"Downloading artifacts {artifact_uri} to {model_dir}")
+    model_path = mlflow.artifacts.download_artifacts(artifact_uri, dst_path=str(model_dir))
+    logger.info(f"Succesfully downloaded {model_name}")
+    model_info = mlflow.models.get_model_info(model_path)
+    metadata = model_info.metadata
+    metadata_path = Path(model_path) / "metadata.json"
+    logger.info(f"Saving metadata to {metadata_path}")
+    with open(metadata_path, "w", encoding="utf-8") as file:
+        json.dump(metadata, file)
+    return Path(model_path)