cooldragon12 commited on
Commit
acf980a
1 Parent(s): 31ab68a

upload app file

Browse files
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ **/__pycache__/
2
+ **venv/
3
+ model_with_bert_multilingual.h5
README.md CHANGED
@@ -1,13 +1,13 @@
1
- ---
2
- title: Multitask Classifying Emotion Toxicity Valorant Chat
3
- emoji: 📊
4
- colorFrom: blue
5
- colorTo: purple
6
- sdk: streamlit
7
- sdk_version: 1.34.0
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Multitask Classifying Emotion Toxicity Valorant Chat
3
+ emoji: 📊
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: streamlit
7
+ sdk_version: 1.34.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+
3
+ from pipeline.model import MultiTaskModel
4
+ from pipeline.preprocessing import Preprocessor
5
+
6
+ # Load the model
7
+ EMOTION_CHOICES = (
8
+ "Angry",
9
+ "Disgust",
10
+ "Happy",
11
+ "Neutral",
12
+ "Sad",
13
+ "Surprise",
14
+ )
15
+ TOXICITY_CHOICES = (
16
+ "Blaming Others",
17
+ "Cyberbullying",
18
+ "Gameplay Experience Complaints",
19
+ "Gamesplaining",
20
+ "Multiple Discrimination",
21
+ "Not Toxic",
22
+ "Sarcasm",
23
+ )
24
+
25
+ st.title("Emotion and Toxicity Classification of Valorant chat messages")
26
+
27
+ st.write(
28
+ 'This is a simple web app that predicts the emotion and toxicity of Valorant chat messages. Enter a message in the text box below and click the "Predict" button to get the prediction.'
29
+ )
30
+
31
+ st.table(
32
+ {
33
+ "Emotion": EMOTION_CHOICES,
34
+ "Toxicity": TOXICITY_CHOICES,
35
+ }
36
+ )
37
+
38
+ @st.cache_resource
39
+ def loading_model():
40
+ return MultiTaskModel(preprocessor=Preprocessor())
41
+
42
+ model = loading_model()
43
+
44
+ # Get user input
45
+ user_input = st.text_input("Enter a Valorant chat message:")
46
+ st.write("You entered:", user_input)
47
+
48
+ # Predict
49
+ prediction = model.predict(user_input)
50
+ emotions, toxicitys = prediction
51
+
52
+ col1, col2 = st.columns(2)
53
+
54
+ with col1:
55
+ for i, emotion in enumerate(emotions[0]):
56
+ st.write(f"{EMOTION_CHOICES[i]}: {(emotion*100):.2f}%")
57
+ st.progress(float(emotion))
58
+ with col2:
59
+ for i, toxicity in enumerate(toxicitys[0]):
60
+ st.write(f"{TOXICITY_CHOICES[i]}: {(toxicity*100):.2f}%")
61
+ st.progress(float(toxicity))
62
+
63
+ decoded = model.decode(prediction)
64
+ # Display the prediction
65
+ st.write("The predicted emotion is:", decoded[0][0])
66
+ st.write("The predicted toxicity is:", decoded[1][0])
pipeline/__pycache__/preprocessing.cpython-311.pyc ADDED
Binary file (720 Bytes). View file
 
pipeline/model/__init__.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+
3
+ from tensorflow.keras.models import load_model
4
+ from transformers import TFBertModel
5
+
6
+ class MultiTaskModel:
7
+ """
8
+ A class used to represent a MultiTaskModel, which classifies the emotion and toxicity of Valorant chat messages
9
+ """
10
+ def __init__(self, is_multilingual = False, preprocessor = None):
11
+ if is_multilingual:
12
+ pre_model = TFBertModel.from_pretrained('bert-base-multilingual-cased')
13
+ self.model = load_model('model_with_bert_multilingual.h5', custom_objects={'TFBertModel': TFBertModel})
14
+ else:
15
+ pre_model = TFBertModel.from_pretrained('bert-base-uncased')
16
+ self.model = load_model('model_with_bert_base.h5', custom_objects={'TFBertModel': pre_model})
17
+ self.load_preprocess(preprocessor)
18
+
19
+ def load_preprocess(self, prep):
20
+ self.preprocessor = prep
21
+
22
+ def predict(self, text):
23
+ preptext= self.preprocessor.preprocess_text(text)
24
+ return self.model.predict(preptext)
25
+
26
+ def decode(self, pred):
27
+ return self.preprocessor.decoder.toxicity(pred[1]), self.preprocessor.decoder.emotion(pred[0])
pipeline/model/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (2.38 kB). View file
 
pipeline/model/__pycache__/model_with_bert_base.cpython-311.pyc ADDED
Binary file (3.42 kB). View file
 
pipeline/model/__pycache__/model_with_bert_multilingual.cpython-311.pyc ADDED
Binary file (3.42 kB). View file
 
pipeline/model/model_with_bert_base.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import TFBertModel
2
+ import tensorflow as tf
3
+ from tensorflow.keras.layers import Input, Dense, LSTM, Bidirectional # type: ignore
4
+ from tensorflow.keras.models import Model # type: ignore
5
+ from tensorflow.keras.optimizers import Adam # type: ignore
6
+ from tensorflow.keras.regularizers import l1_l2 # type: ignore
7
+
8
+ def build_model(max_length = 65, layer = 40,dropout = 0.69, l2_lstm = 0.01, learning_rate = 1e-4)-> Model:
9
+ bert = TFBertModel.from_pretrained('bert-base-cased')
10
+ # Model definition inside the loop
11
+ input_ids = Input(shape=(max_length,), dtype=tf.int32, name='input_ids')
12
+ bert_output = bert(input_ids)[0] # type: ignore
13
+
14
+ bi_lstm_emotion = Bidirectional(LSTM(layer, dropout=dropout, kernel_regularizer=l1_l2(l2_lstm*0.15,l2_lstm)))(bert_output)
15
+ bi_lstm_toxicity = Bidirectional(LSTM(layer, dropout=dropout, kernel_regularizer=l1_l2(l2_lstm*0.2,l2_lstm)))(bert_output) # outputs
16
+
17
+ output_emotion = Dense(6, activation='softmax', name='emotion_output')(bi_lstm_emotion)
18
+ output_toxicity = Dense(7, activation='softmax', name='toxicity_output')(bi_lstm_toxicity)
19
+
20
+
21
+ model = Model(inputs=input_ids, outputs=[output_emotion, output_toxicity])
22
+ # # Compile
23
+ # model = create_multitask_model_with_bert(y_toxicity, y_emotion, TFBertModel, max_length, lstm_dropout=0.2, layers=lstm_layers)
24
+ model.compile(
25
+ optimizer=Adam(learning_rate=learning_rate),
26
+ loss={'emotion_output': 'categorical_crossentropy', 'toxicity_output': 'categorical_crossentropy'},
27
+ metrics={
28
+ 'emotion_output': ['accuracy', tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'), tf.keras.metrics.AUC(name='em_auc', multi_label=True), tf.keras.metrics.F1Score(name='f1_score')], # type: ignore
29
+ 'toxicity_output': ['accuracy', tf.keras.metrics.Precision(name='precision'), tf.keras.metrics.Recall(name='recall'), tf.keras.metrics.AUC(name='to_auc', multi_label=True), tf.keras.metrics.F1Score(name='f1_score')], # type: ignore
30
+ }
31
+ )
32
+
33
+ return model
pipeline/model/model_with_bert_multilingual.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import TFBertModel
2
+ import tensorflow as tf
3
+ from tensorflow.keras.layers import Input, Dense, LSTM, Bidirectional # type: ignore
4
+ from tensorflow.keras.models import Model # type: ignore
5
+ from tensorflow.keras.optimizers import Adam # type: ignore
6
+ from tensorflow.keras.regularizers import l1_l2 # type: ignore
7
+
8
+
9
+ def build_model_multilingual(max_length = 65, layer = 40,dropout = 0.69, l2_lstm = 0.01, learning_rate = 1e-4):
10
+
pipeline/preprocessing/__init__.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import BertTokenizer
2
+
3
+ class Decoder:
4
+ def __init__(self):
5
+ import pickle
6
+ with open('pipeline/preprocessing/encoder_toxicity.pkl', 'rb') as f:
7
+ self.__encoder_toxicity = pickle.load(f)
8
+ with open('pipeline/preprocessing/encoder_emotion.pkl', 'rb') as f:
9
+ self.__encoder_emotion = pickle.load(f)
10
+
11
+ # Decoding one-hot encoded labels
12
+ def toxicity(self,pred):
13
+ return self.__encoder_toxicity.inverse_transform(pred)
14
+
15
+ def emotion(self,pred):
16
+ return self.__encoder_emotion.inverse_transform(pred)
17
+
18
+ class Preprocessor:
19
+ """A class used to represent a Preprocessor, which preprocesses text data for the model"""
20
+ def __init__(self, is_multilingual = False):
21
+ if is_multilingual:
22
+ self.tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-cased')
23
+ else:
24
+ self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
25
+ self.decoder = Decoder()
26
+ """Added a decoder object to the Preprocessor class to decode the one-hot encoded labels"""
27
+
28
+ def preprocess_text(self,text):
29
+ return self.tokenizer.encode(text,add_special_tokens=True, max_length=65,
30
+ padding="max_length", truncation=True, return_attention_mask=False, return_tensors='tf')
pipeline/preprocessing/__pycache__/__init__.cpython-311.pyc ADDED
Binary file (2.98 kB). View file
 
pipeline/preprocessing/encoder_emotion.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ba4d4b26a861d6e86d8e0af68999029462ef7370d0dce662624c3567b1fd2eb
3
+ size 660
pipeline/preprocessing/encoder_toxicity.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3bd0fc9b40367e23383e557a1fce50e640e9e7efeee8fbf1faca61a3f12fee9
3
+ size 738
requirements.txt ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==2.1.0
2
+ altair==5.3.0
3
+ astunparse==1.6.3
4
+ attrs==23.2.0
5
+ blinker==1.8.2
6
+ cachetools==5.3.3
7
+ certifi==2024.2.2
8
+ charset-normalizer==3.3.2
9
+ click==8.1.7
10
+ colorama==0.4.6
11
+ filelock==3.14.0
12
+ flatbuffers==24.3.25
13
+ fsspec==2024.5.0
14
+ gast==0.5.4
15
+ gitdb==4.0.11
16
+ GitPython==3.1.43
17
+ google-auth==2.29.0
18
+ google-auth-oauthlib==1.2.0
19
+ google-pasta==0.2.0
20
+ grpcio==1.63.0
21
+ h5py==3.11.0
22
+ huggingface-hub==0.23.0
23
+ idna==3.7
24
+ Jinja2==3.1.4
25
+ joblib==1.4.2
26
+ jsonschema==4.22.0
27
+ jsonschema-specifications==2023.12.1
28
+ keras==2.15.0
29
+ libclang==18.1.1
30
+ Markdown==3.6
31
+ markdown-it-py==3.0.0
32
+ MarkupSafe==2.1.5
33
+ mdurl==0.1.2
34
+ ml-dtypes==0.2.0
35
+ namex==0.0.8
36
+ numpy==1.26.4
37
+ oauthlib==3.2.2
38
+ opt-einsum==3.3.0
39
+ optree==0.11.0
40
+ packaging==24.0
41
+ pandas==2.2.2
42
+ pillow==10.3.0
43
+ protobuf==4.25.3
44
+ pyarrow==16.1.0
45
+ pyasn1==0.6.0
46
+ pyasn1_modules==0.4.0
47
+ pydeck==0.9.1
48
+ Pygments==2.18.0
49
+ python-dateutil==2.9.0.post0
50
+ pytz==2024.1
51
+ PyYAML==6.0.1
52
+ referencing==0.35.1
53
+ regex==2024.5.15
54
+ requests==2.31.0
55
+ requests-oauthlib==2.0.0
56
+ rich==13.7.1
57
+ rpds-py==0.18.1
58
+ rsa==4.9
59
+ safetensors==0.4.3
60
+ scikit-learn==1.4.2
61
+ scipy==1.13.0
62
+ six==1.16.0
63
+ smmap==5.0.1
64
+ streamlit==1.34.0
65
+ tenacity==8.3.0
66
+ tensorboard==2.15.2
67
+ tensorboard-data-server==0.7.2
68
+ tensorflow==2.15.0
69
+ tensorflow-estimator==2.15.0
70
+ tensorflow-intel==2.15.0
71
+ tensorflow-io-gcs-filesystem==0.31.0
72
+ termcolor==2.4.0
73
+ threadpoolctl==3.5.0
74
+ tokenizers==0.19.1
75
+ toml==0.10.2
76
+ toolz==0.12.1
77
+ tornado==6.4
78
+ tqdm==4.66.4
79
+ transformers==4.41.0
80
+ typing_extensions==4.11.0
81
+ tzdata==2024.1
82
+ urllib3==2.2.1
83
+ watchdog==4.0.0
84
+ Werkzeug==3.0.3
85
+ wrapt==1.14.1