Spaces:

sdutta28
/

AggDetectApp

Sleeping

App Files Files Community

sdutta28 commited on Jun 18, 2023

Commit

15c875a

•

1 Parent(s): e00c87e

Added LIME explainability

Browse files

Files changed (7) hide show

.gitignore +5 -1
app.py +8 -4
components/config.py +1 -0
components/get_predictions.py +59 -11
components/models.py +12 -0
components/utils.py +12 -3
requirements.txt +7 -0

.gitignore CHANGED Viewed

@@ -191,4 +191,8 @@ cython_debug/
 # Support for Project snippet scope
-# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,data

 # Support for Project snippet scope
+# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,data
+static/nltk
+.vscode
+try.py

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
 from components.get_predictions import get_predictions
-from gradio.components import Textbox
 from gradio.interface import Interface
 from gradio.themes import Monochrome
 def get_input_fields() -> Textbox:
@@ -11,23 +12,25 @@ def get_input_fields() -> Textbox:
         Textbox: Input Field as gradio TextBox
     """
     return Textbox(
-        lines=2,
         placeholder="Enter The Text",
         value="",
         label="Text to Predict",
     )
-def get_output_fields() -> list[Textbox]:
     """Gets Output Fields
     Returns:
-        list[Textbox...]: output fields as gradio textbox
     """
     return [
         Textbox(type="text", label="Aggression Prediction"),
         Textbox(type="text", label="Misogyny Prediction"),
     ]
@@ -51,6 +54,7 @@ def get_interface() -> Interface:
 if __name__ == "__main__":
     interface = get_interface()
     # Launch the interface

 from components.get_predictions import get_predictions
+from gradio.components import Textbox, IOComponent, Plot
 from gradio.interface import Interface
 from gradio.themes import Monochrome
+from components.utils import initialize
 def get_input_fields() -> Textbox:
         Textbox: Input Field as gradio TextBox
     """
     return Textbox(
+        lines=10,
         placeholder="Enter The Text",
         value="",
         label="Text to Predict",
     )
+def get_output_fields() -> list[str | IOComponent]:
     """Gets Output Fields
     Returns:
+        list[str | IOComponent]: output fields as gradio textbox
     """
     return [
         Textbox(type="text", label="Aggression Prediction"),
         Textbox(type="text", label="Misogyny Prediction"),
+        Plot(label="Explanation of Aggression", scale=1),
+        Plot(label="Explanation of Misogyny", scale=1),
     ]
 if __name__ == "__main__":
+    initialize()
     interface = get_interface()
     # Launch the interface

components/config.py CHANGED Viewed

@@ -12,6 +12,7 @@ class Settings:
         0: "NGEN - Non Misogynistic Content",
         1: "GEN - Misogynistic Content",
     }
 app_config = Settings()

         0: "NGEN - Non Misogynistic Content",
         1: "GEN - Misogynistic Content",
     }
+    NUM_EXPLAINER_FEATURES: int = 10
 app_config = Settings()

components/get_predictions.py CHANGED Viewed

@@ -1,29 +1,77 @@
 import components.utils as utils
 from components.config import app_config
-import joblib
-def get_predictions(text: str) -> tuple[str, str]:
     """Gets Predictions for the Texts
     Args:
         text (str): The input text to get predictions for
     Returns:
-        tuple[str, str]: Predictions for task A and task B
     """
     cleaned_data = [utils.clean_one_text(text)]
-    # Load Models
-    model_1 = joblib.load(app_config.TASK_A_MODEL_PATH)
-    model_2 = joblib.load(app_config.TASK_B_MODEL_PATH)
-    # Predictions
-    pred_1 = model_1.predict(cleaned_data)[0]
-    pred_2 = model_2.predict(cleaned_data)[0]
     return (
-        app_config.TASK_A_MAP[pred_1],
-        app_config.TASK_B_MAP[pred_2],
     )

 import components.utils as utils
 from components.config import app_config
+from components.models import (
+    pipeline_task_A,
+    pipeline_task_B,
+    explainer_task_A,
+    explainer_task_B,
+)
+from lime.lime_text import LimeTextExplainer
+from typing import Any
+from matplotlib.figure import Figure
+def predict_for_pipeline(
+    model_pipeline: Any,
+    explainer: LimeTextExplainer,
+    cleaned_data: list[str],
+    labels: list,
+) -> tuple[int, Figure | None]:
+    """Generates Prediction and Explanation given the cleaned text
+    Args:
+        model_pipeline (Any): Joblib imported model pipeline
+        explainer (LimeTextExplainer): text explainer
+        cleaned_data (list[str]): cleaned text
+        labels(list): list of integers as labels
+    Returns:
+        tuple[int, Figure]: class prediction and LIME explanation as matplotlib figure
+    """
+    explanation = explainer.explain_instance(
+        cleaned_data[0],
+        model_pipeline.predict_proba,
+        num_features=app_config.NUM_EXPLAINER_FEATURES,
+        labels=labels,
+    )
+    class_prediction = model_pipeline.predict(cleaned_data)[0]
+    return class_prediction, explanation.as_pyplot_figure(label=1)
+def get_predictions(text: str) -> tuple:
     """Gets Predictions for the Texts
     Args:
         text (str): The input text to get predictions for
     Returns:
+        tuple[str, Any]: Predictions for task A and task B
+        along with Figures
     """
     cleaned_data = [utils.clean_one_text(text)]
+    prediction_task_A = predict_for_pipeline(
+        pipeline_task_A,
+        explainer_task_A,
+        cleaned_data,
+        [0, 1, 2],
+    )
+    prediction_task_B = predict_for_pipeline(
+        pipeline_task_B,
+        explainer_task_B,
+        cleaned_data,
+        [0, 1],
+    )
+    print(prediction_task_A)
+    print(prediction_task_B)
     return (
+        app_config.TASK_A_MAP[prediction_task_A[0]],
+        app_config.TASK_B_MAP[prediction_task_B[0]],
+        prediction_task_A[1],
+        prediction_task_B[1],
     )

components/models.py ADDED Viewed

	@@ -0,0 +1,12 @@

+import joblib
+from components.config import app_config
+from lime.lime_text import LimeTextExplainer
+# Takes in a string and outputs list
+pipeline_task_A = joblib.load(app_config.TASK_A_MODEL_PATH)
+pipeline_task_B = joblib.load(app_config.TASK_B_MODEL_PATH)
+# LIME text explainer
+explainer_task_A: LimeTextExplainer = LimeTextExplainer()
+explainer_task_B: LimeTextExplainer = LimeTextExplainer()

components/utils.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import string
 import nltk
 import re
-nltk.download("stopwords")
 # Cleans one text
@@ -32,8 +32,17 @@ def clean_one_text(text: str) -> str:
     s.difference_update(not_words)
-    stmr = nltk.stem.porter.PorterStemmer()
     tokens = [token for token in tk.tokenize(new_string) if token.lower() not in s]
     clean_tokens = [stmr.stem(token) for token in tokens]
     text = " ".join(clean_tokens)
     return text

 import string
 import nltk
 import re
+from nltk.stem.porter import PorterStemmer
+import warnings
 # Cleans one text
     s.difference_update(not_words)
+    stmr = PorterStemmer()
     tokens = [token for token in tk.tokenize(new_string) if token.lower() not in s]
     clean_tokens = [stmr.stem(token) for token in tokens]
     text = " ".join(clean_tokens)
     return text
+def setup_nltk():
+    nltk.download("stopwords")
+def initialize():
+    warnings.filterwarnings("ignore")
+    setup_nltk()

requirements.txt CHANGED Viewed

@@ -36,11 +36,14 @@ httpcore==0.17.2
 httpx==0.24.1
 huggingface-hub==0.15.1
 idna==3.4
 itsdangerous==2.1.2
 Jinja2==3.1.2
 joblib==1.2.0
 jsonschema==4.17.3
 kiwisolver==1.4.4
 linkify-it-py==2.0.2
 markdown-it-py==2.2.0
 markdown2==2.4.8
@@ -51,6 +54,7 @@ mdurl==0.1.2
 monotonic==1.6
 multidict==6.0.4
 mypy-extensions==1.0.0
 nltk==3.8.1
 numpy==1.24.3
 orjson==3.9.1
@@ -71,9 +75,11 @@ pyrsistent==0.19.3
 python-dateutil==2.8.2
 python-multipart==0.0.6
 pytz==2023.3
 PyYAML==6.0
 regex==2023.6.3
 requests==2.31.0
 scikit-learn==1.2.2
 scipy==1.10.1
 semantic-version==2.10.0
@@ -81,6 +87,7 @@ six==1.16.0
 sniffio==1.3.0
 starlette==0.27.0
 threadpoolctl==3.1.0
 tomli==2.0.1
 toolz==0.12.0
 tqdm==4.65.0

 httpx==0.24.1
 huggingface-hub==0.15.1
 idna==3.4
+imageio==2.31.1
 itsdangerous==2.1.2
 Jinja2==3.1.2
 joblib==1.2.0
 jsonschema==4.17.3
 kiwisolver==1.4.4
+lazy_loader==0.2
+lime==0.2.0.1
 linkify-it-py==2.0.2
 markdown-it-py==2.2.0
 markdown2==2.4.8
 monotonic==1.6
 multidict==6.0.4
 mypy-extensions==1.0.0
+networkx==3.1
 nltk==3.8.1
 numpy==1.24.3
 orjson==3.9.1
 python-dateutil==2.8.2
 python-multipart==0.0.6
 pytz==2023.3
+PyWavelets==1.4.1
 PyYAML==6.0
 regex==2023.6.3
 requests==2.31.0
+scikit-image==0.21.0
 scikit-learn==1.2.2
 scipy==1.10.1
 semantic-version==2.10.0
 sniffio==1.3.0
 starlette==0.27.0
 threadpoolctl==3.1.0
+tifffile==2023.4.12
 tomli==2.0.1
 toolz==0.12.0
 tqdm==4.65.0