Spaces:

cooldragon12
/

multitask-classifying-emotion-toxicity-valorant-chat

Sleeping

App Files Files Community

cooldragon12 commited on Jun 13

Commit

f67c2de

•

1 Parent(s): 3d200ef

Added Multilingual BERT Base

Browse files

Files changed (12) hide show

.gitignore +1 -2
README.md +3 -0
app.py +39 -34
model_with_bert_multilingual.h5 +3 -0
pipeline/model/__init__.py +1 -2
pipeline/model/__pycache__/__init__.cpython-311.pyc +0 -0
pipeline/model/model_with_bert_multilingual.py +25 -1
pipeline/preprocessing/__init__.py +3 -30
pipeline/preprocessing/__pycache__/__init__.cpython-311.pyc +0 -0
pipeline/preprocessing/constants.py +18 -0
pipeline/preprocessing/decoder.py +14 -0
pipeline/preprocessing/tokenizer.py +15 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,2 @@
 **/__pycache__/
-**venv/
-model_with_bert_multilingual.h5


1	**/__pycache__/
2	+ **venv/

README.md CHANGED Viewed

@@ -1,5 +1,6 @@
 ---
 title: Multitask Classifying Emotion Toxicity Valorant Chat
 emoji: 📊
 colorFrom: blue
 colorTo: purple
@@ -10,4 +11,6 @@ pinned: false
 license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Multitask Classifying Emotion Toxicity Valorant Chat
+summary: This space contains a Streamlit app that classifies the toxicity and emotion of Valorant chat messages. From the model of ChattyTicket.
 emoji: 📊
 colorFrom: blue
 colorTo: purple
 license: mit
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,66 +1,71 @@
 import streamlit as st
 from pipeline.model import MultiTaskModel
-from pipeline.preprocessing import Preprocessor
 # Load the model
-EMOTION_CHOICES = (
-    "Angry",
-    "Disgust",
-    "Happy",
-    "Neutral",
-    "Sad",
-    "Surprise",
-)
-TOXICITY_CHOICES = (
-    "Blaming Others",
-    "Cyberbullying",
-    "Gameplay Experience Complaints",
-    "Gamesplaining",
-    "Multiple Discrimination",
-    "Not Toxic",
-    "Sarcasm",
-)
-st.title("Emotion and Toxicity Classification of Valorant chat messages")
-st.write(
-    'This is a simple web app that predicts the emotion and toxicity of Valorant chat messages. Enter a message in the text box below and click the "Predict" button to get the prediction.'
 )
 st.table(
     {
-        "Emotion": EMOTION_CHOICES,
-        "Toxicity": TOXICITY_CHOICES,
     }
 )
 @st.cache_resource
-def loading_model():
-    return MultiTaskModel(preprocessor=Preprocessor())
-model = loading_model()
 # Get user input
 user_input = st.text_input("Enter a Valorant chat message:")
 st.write("You entered:", user_input)
 # Predict
 prediction = model.predict(user_input)
 emotions, toxicitys = prediction
 col1, col2 = st.columns(2)
 with col1:
     for i, emotion in enumerate(emotions[0]):
-        st.write(f"{EMOTION_CHOICES[i]}: {(emotion*100):.2f}%")
         st.progress(float(emotion))
 with col2:
     for i, toxicity in enumerate(toxicitys[0]):
-        st.write(f"{TOXICITY_CHOICES[i]}: {(toxicity*100):.2f}%")
         st.progress(float(toxicity))
-decoded = model.decode(prediction)
-# Display the prediction
-st.write("The predicted emotion is:", decoded[0][0])
-st.write("The predicted toxicity is:", decoded[1][0])

 import streamlit as st
 from pipeline.model import MultiTaskModel
+from pipeline.preprocessing import Preprocessor, EMOTION_LABELS, TOXICITY_LABELS
 # Load the model
+st.title("ChattyTicket Model: Emotion and Toxicity Classification of Valorant chat messages")
+st.write("""Discover our innovative model designed to tackle toxicity in Valorant, a popular multiplayer game. Leveraging advanced multi-task learning, our model combines Bi-LSTM and BERT architectures for superior performance. This approach enables accurate classification of toxic conversations and emotional content within the game.
+Key Features:
+ - Multi-Task Learning: Simultaneous classification of toxicity and emotion.
+ - Bi-LSTM & BERT Integration: Enhanced accuracy with a BERT pre-trained backbone.
+ - High Accuracy: 91.81% in toxicity detection and 86.74% in emotion prediction.
+Insights & Impact:
+ - Emotional Landscape: Identifies prevalent emotions like anger and instances of cyberbullying.
+ - Healthier Communities: Provides insights for fostering positive gaming environments.
+ChattyTicket API:
+    - Evaluate chat text for emotion and toxicity with our user-friendly API. Positive user feedback highlights its effectiveness and interactivity, with ongoing improvements based on user input to better handle nuances like sarcasm.
+Experience the future of online gaming moderation with our model and contribute to a healthier, more enjoyable gaming community.
+         """
 )
 st.table(
     {
+        "Emotion": EMOTION_LABELS,
+        "Toxicity": TOXICITY_LABELS,
     }
 )
 @st.cache_resource
+def loading_model(bert_base):
+    if bert_base == "bert-base-uncased":
+        print("Loading base model")
+        return MultiTaskModel(preprocessor=Preprocessor())
+    elif bert_base == "bert-base-multilingual-cased":
+        print("Loading multilingual model")
+        return MultiTaskModel(is_multilingual=True, preprocessor=Preprocessor(is_multilingual=True))
+    else:
+        return None
+def clear():
+    loading_model.clear()
+bert_base = st.selectbox("Select a model", ("bert-base-uncased", "bert-base-multilingual-cased"), placeholder="Select a model", on_change=clear)
+model = loading_model(bert_base)
 # Get user input
 user_input = st.text_input("Enter a Valorant chat message:")
 st.write("You entered:", user_input)
 # Predict
 prediction = model.predict(user_input)
 emotions, toxicitys = prediction
+decoded = model.decode(prediction)
 col1, col2 = st.columns(2)
 with col1:
+    st.write(f"The predicted emotion is: {decoded[0][0][0]}" )
     for i, emotion in enumerate(emotions[0]):
+        st.write(f"{EMOTION_LABELS[i]}: {(emotion*100):.2f}%")
         st.progress(float(emotion))
 with col2:
+    st.write(f"The predicted toxicity is: {decoded[1][0][0]} ")
     for i, toxicity in enumerate(toxicitys[0]):
+        st.write(f"{TOXICITY_LABELS[i]}: {(toxicity*100):.2f}%")
         st.progress(float(toxicity))

model_with_bert_multilingual.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:50a1ae46552fba31db5adea05b4edc300eb9c22a3932fbbf44e7ebeeaa883774
+size 2141328712

pipeline/model/__init__.py CHANGED Viewed

@@ -1,5 +1,3 @@
-import tensorflow as tf
 from tensorflow.keras.models import load_model
 from  transformers import TFBertModel
@@ -21,6 +19,7 @@ class MultiTaskModel:
     def predict(self, text):
         preptext= self.preprocessor.preprocess_text(text)
         return self.model.predict(preptext)
     def decode(self, pred):

 from tensorflow.keras.models import load_model
 from  transformers import TFBertModel
     def predict(self, text):
         preptext= self.preprocessor.preprocess_text(text)
+        print(self.model)
         return self.model.predict(preptext)
     def decode(self, pred):

pipeline/model/__pycache__/__init__.cpython-311.pyc CHANGED Viewed

Binary files a/pipeline/model/__pycache__/__init__.cpython-311.pyc and b/pipeline/model/__pycache__/__init__.cpython-311.pyc differ

pipeline/model/model_with_bert_multilingual.py CHANGED Viewed

	@@ -7,4 +7,28 @@ from tensorflow.keras.regularizers import l1_l2 # type: ignore
7
8
9	def build_model_multilingual(max_length = 65, layer = 40,dropout = 0.69, l2_lstm = 0.01, learning_rate = 1e-4):
10	-

 def build_model_multilingual(max_length = 65, layer = 40,dropout = 0.69, l2_lstm = 0.01, learning_rate = 1e-4):
+    bert = TFBertModel.from_pretrained('bert-base-multilingual-cased')
+    # Model definition inside the loop
+    input_ids = Input(shape=(max_length,), dtype=tf.int32, name='input_ids')
+    bert_output = bert(input_ids)[0] # type: ignore
+    bi_lstm_emotion = Bidirectional(LSTM(layer, dropout=dropout, kernel_regularizer=l1_l2(l2_lstm*0.15,l2_lstm)))(bert_output)
+    bi_lstm_toxicity = Bidirectional(LSTM(layer, dropout=dropout, kernel_regularizer=l1_l2(l2_lstm*0.2,l2_lstm)))(bert_output) # outputs
+    output_emotion = Dense(6, activation='softmax', name='emotion_output')(bi_lstm_emotion)
+    output_toxicity = Dense(7, activation='softmax', name='toxicity_output')(bi_lstm_toxicity)
+    model = Model(inputs=input_ids, outputs=[output_emotion, output_toxicity])
+                    # # Compile
+                    # model = create_multitask_model_with_bert(y_toxicity, y_emotion, TFBertModel, max_length, lstm_dropout=0.2, layers=lstm_layers)
+    model.compile(
+        optimizer=Adam(learning_rate=learning_rate),
+        loss={'emotion_output': 'categorical_crossentropy', 'toxicity_output': 'categorical_crossentropy'},
+        metrics={
+            'emotion_output': ['accuracy', tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'), tf.keras.metrics.AUC(name='em_auc', multi_label=True), tf.keras.metrics.F1Score(name='f1_score')], # type: ignore
+            'toxicity_output': ['accuracy', tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'), tf.keras.metrics.AUC(name='to_auc', multi_label=True), tf.keras.metrics.F1Score(name='f1_score')], # type: ignore
+        }
+    )
+    return model

pipeline/preprocessing/__init__.py CHANGED Viewed

@@ -1,30 +1,3 @@
-from transformers import BertTokenizer
-class Decoder:
-    def __init__(self):
-        import pickle
-        with open('pipeline/preprocessing/encoder_toxicity.pkl', 'rb') as f:
-            self.__encoder_toxicity = pickle.load(f)
-        with open('pipeline/preprocessing/encoder_emotion.pkl', 'rb') as f:
-            self.__encoder_emotion = pickle.load(f)
-    # Decoding one-hot encoded labels
-    def toxicity(self,pred):
-        return self.__encoder_toxicity.inverse_transform(pred)
-    def emotion(self,pred):
-        return self.__encoder_emotion.inverse_transform(pred)
-class Preprocessor:
-    """A class used to represent a Preprocessor, which preprocesses text data for the model"""
-    def __init__(self, is_multilingual = False):
-        if is_multilingual:
-            self.tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
-        else:
-            self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
-        self.decoder = Decoder()
-        """Added a decoder object to the Preprocessor class to decode the one-hot encoded labels"""
-    def preprocess_text(self,text):
-        return self.tokenizer.encode(text,add_special_tokens=True, max_length=65,
-        padding="max_length", truncation=True, return_attention_mask=False, return_tensors='tf')

+from .constants import EMOTION_LABELS, TOXICITY_LABELS
+from .decoder import Decoder
+from .tokenizer import Preprocessor

pipeline/preprocessing/__pycache__/__init__.cpython-311.pyc CHANGED Viewed

Binary files a/pipeline/preprocessing/__pycache__/__init__.cpython-311.pyc and b/pipeline/preprocessing/__pycache__/__init__.cpython-311.pyc differ

pipeline/preprocessing/constants.py ADDED Viewed

	@@ -0,0 +1,18 @@

+EMOTION_LABELS = (
+    "Angry",
+    "Disgust",
+    "Happy",
+    "Neutral",
+    "Sad",
+    "Surprise",
+)
+TOXICITY_LABELS = (
+    "Blaming Others",
+    "Cyberbullying",
+    "Gameplay Experience Complaints",
+    "Gamesplaining",
+    "Multiple Discrimination",
+    "Not Toxic",
+    "Sarcasm",
+)

pipeline/preprocessing/decoder.py ADDED Viewed

	@@ -0,0 +1,14 @@

+class Decoder:
+    def __init__(self):
+        import pickle
+        with open('pipeline/preprocessing/encoder_toxicity.pkl', 'rb') as v:
+            self.__encoder_toxicity = pickle.load(v)
+        with open('pipeline/preprocessing/encoder_emotion.pkl', 'rb') as v:
+            self.__encoder_emotion = pickle.load(v)
+    # Decoding one-hot encoded labels
+    def toxicity(self,pred):
+        return self.__encoder_toxicity.inverse_transform(pred)
+    def emotion(self,pred):
+        return self.__encoder_emotion.inverse_transform(pred)

pipeline/preprocessing/tokenizer.py ADDED Viewed

	@@ -0,0 +1,15 @@

+from transformers import BertTokenizer
+from .decoder import Decoder
+class Preprocessor:
+    """A class used to represent a Preprocessor, which preprocesses text data for the model"""
+    def __init__(self, is_multilingual = False):
+        if is_multilingual:
+            self.tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
+        else:
+            self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
+        self.decoder = Decoder()
+        """Added a decoder object to the Preprocessor class to decode the one-hot encoded labels"""
+    def preprocess_text(self,text):
+        return self.tokenizer.encode(text,add_special_tokens=True, max_length=65,
+        padding="max_length", truncation=True, return_attention_mask=False, return_tensors='tf')