{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"none","dataSources":[{"sourceId":7846701,"sourceType":"datasetVersion","datasetId":4600969}],"dockerImageVersionId":30664,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":false}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"import os\nimport pandas as pd\nimport numpy as np\n#visualizations\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n%matplotlib inline\n#consistent sized plot \nfrom pylab import rcParams\nrcParams['figure.figsize']=12,5\nrcParams['axes.labelsize']=12\nrcParams['xtick.labelsize']=12\nrcParams['ytick.labelsize']=12\n#handle the warnings in the code\nimport warnings\nwarnings.filterwarnings(action='ignore',category=DeprecationWarning)\nwarnings.filterwarnings(action='ignore',category=FutureWarning)\n#text preprocessing libraries\nimport nltk\nimport nltk\nfrom nltk.corpus import stopwords\nnltk.download('punkt')\nnltk.download('stopwords')\nfrom nltk.tokenize import word_tokenize\nfrom nltk.tokenize import sent_tokenize\nfrom nltk.tokenize import WordPunctTokenizer\nfrom nltk.tokenize import TweetTokenizer\nfrom nltk.stem import WordNetLemmatizer\nfrom nltk.stem import PorterStemmer\n#import texthero\n#import texthero as hero\n#regular expressions\nimport re\n#display pandas dataframe columns \npd.options.display.max_columns = None\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import confusion_matrix\nfrom mlxtend.plotting import plot_confusion_matrix\n\nfrom sklearn import preprocessing\n\nimport tensorflow as tf\nfrom tensorflow.keras.models import Sequential\nfrom keras.utils import to_categorical\nfrom tensorflow.keras.preprocessing.text import Tokenizer\nfrom tensorflow.keras.preprocessing.sequence import pad_sequences\nimport tensorflow as tf\nfrom tensorflow.keras.layers import Embedding, LSTM, Dense, SpatialDropout1D\nfrom tensorflow.keras.callbacks import EarlyStopping\nfrom sklearn.utils.class_weight import compute_class_weight\n\nimport numpy as np\nimport tensorflow as tf\nfrom sklearn.utils.class_weight import compute_class_weight\nfrom tensorflow.keras.callbacks import EarlyStopping\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:38:29.366703Z","iopub.execute_input":"2024-03-16T02:38:29.367141Z","iopub.status.idle":"2024-03-16T02:38:48.566586Z","shell.execute_reply.started":"2024-03-16T02:38:29.367093Z","shell.execute_reply":"2024-03-16T02:38:48.565399Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"[nltk_data] Downloading package punkt to /usr/share/nltk_data...\n[nltk_data] Package punkt is already up-to-date!\n[nltk_data] Downloading package stopwords to /usr/share/nltk_data...\n[nltk_data] Package stopwords is already up-to-date!\n","output_type":"stream"},{"name":"stderr","text":"2024-03-16 02:38:36.372092: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n2024-03-16 02:38:36.372273: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n2024-03-16 02:38:36.580592: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","output_type":"stream"}]},{"cell_type":"code","source":"pip install keras\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:38:48.568968Z","iopub.execute_input":"2024-03-16T02:38:48.569807Z","iopub.status.idle":"2024-03-16T02:39:04.900700Z","shell.execute_reply.started":"2024-03-16T02:38:48.569763Z","shell.execute_reply":"2024-03-16T02:39:04.899274Z"},"trusted":true},"execution_count":2,"outputs":[{"name":"stderr","text":"/opt/conda/lib/python3.10/pty.py:89: RuntimeWarning: os.fork() was called. os.fork() is incompatible with multithreaded code, and JAX is multithreaded, so this will likely lead to a deadlock.\n pid, fd = os.forkpty()\n","output_type":"stream"},{"name":"stdout","text":"Requirement already satisfied: keras in /opt/conda/lib/python3.10/site-packages (3.0.5)\nRequirement already satisfied: absl-py in /opt/conda/lib/python3.10/site-packages (from keras) (1.4.0)\nRequirement already satisfied: numpy in /opt/conda/lib/python3.10/site-packages (from keras) (1.26.4)\nRequirement already satisfied: rich in /opt/conda/lib/python3.10/site-packages (from keras) (13.7.0)\nRequirement already satisfied: namex in /opt/conda/lib/python3.10/site-packages (from keras) (0.0.7)\nRequirement already satisfied: h5py in /opt/conda/lib/python3.10/site-packages (from keras) (3.10.0)\nRequirement already satisfied: dm-tree in /opt/conda/lib/python3.10/site-packages (from keras) (0.1.8)\nRequirement already satisfied: ml-dtypes in /opt/conda/lib/python3.10/site-packages (from keras) (0.2.0)\nRequirement already satisfied: markdown-it-py>=2.2.0 in /opt/conda/lib/python3.10/site-packages (from rich->keras) (3.0.0)\nRequirement already satisfied: pygments<3.0.0,>=2.13.0 in /opt/conda/lib/python3.10/site-packages (from rich->keras) (2.17.2)\nRequirement already satisfied: mdurl~=0.1 in /opt/conda/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->keras) (0.1.2)\nNote: you may need to restart the kernel to use updated packages.\n","output_type":"stream"}]},{"cell_type":"code","source":"df = pd.read_csv(\"/kaggle/input/traindataset/train.csv\")\ndf\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:04.902643Z","iopub.execute_input":"2024-03-16T02:39:04.903008Z","iopub.status.idle":"2024-03-16T02:39:05.057409Z","shell.execute_reply.started":"2024-03-16T02:39:04.902973Z","shell.execute_reply":"2024-03-16T02:39:05.056204Z"},"trusted":true},"execution_count":3,"outputs":[{"execution_count":3,"output_type":"execute_result","data":{"text/plain":" Unnamed: 0.1 Unnamed: 0 count hate_speech_count \\\n0 0 0 3 0 \n1 1 1 3 0 \n2 2 2 3 0 \n3 3 3 3 0 \n4 4 4 6 0 \n... ... ... ... ... \n24778 24778 24778 3 0 \n24779 24779 24779 3 0 \n24780 24780 24780 3 0 \n24781 24781 24781 6 0 \n24782 24782 24782 3 0 \n\n offensive_language_count neither_count class \\\n0 0 3 2 \n1 3 0 1 \n2 3 0 1 \n3 2 1 1 \n4 6 0 1 \n... ... ... ... \n24778 2 1 1 \n24779 1 2 2 \n24780 3 0 1 \n24781 6 0 1 \n24782 0 3 2 \n\n tweet \n0 (this text should be deleted) !!! RT @mayasolo... \n1 (this text should be deleted) !!!!! RT @mleew1... \n2 (this text should be deleted) !!!!!!! RT @UrKi... \n3 (this text should be deleted) !!!!!!!!! RT @C_... \n4 (this text should be deleted) !!!!!!!!!!!!! RT... \n... ... \n24778 (this text should be deleted) you's a muthaf**... \n24779 (this text should be deleted) you've gone and ... \n24780 (this text should be deleted) young buck wanna... \n24781 (this text should be deleted) youu got wild bi... \n24782 (this text should be deleted) ~~Ruffled | Ntac... \n\n[24783 rows x 8 columns]","text/html":"
\n\n
\n \n \n | \n Unnamed: 0.1 | \n Unnamed: 0 | \n count | \n hate_speech_count | \n offensive_language_count | \n neither_count | \n class | \n tweet | \n
\n \n \n \n 0 | \n 0 | \n 0 | \n 3 | \n 0 | \n 0 | \n 3 | \n 2 | \n (this text should be deleted) !!! RT @mayasolo... | \n
\n \n 1 | \n 1 | \n 1 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n (this text should be deleted) !!!!! RT @mleew1... | \n
\n \n 2 | \n 2 | \n 2 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n (this text should be deleted) !!!!!!! RT @UrKi... | \n
\n \n 3 | \n 3 | \n 3 | \n 3 | \n 0 | \n 2 | \n 1 | \n 1 | \n (this text should be deleted) !!!!!!!!! RT @C_... | \n
\n \n 4 | \n 4 | \n 4 | \n 6 | \n 0 | \n 6 | \n 0 | \n 1 | \n (this text should be deleted) !!!!!!!!!!!!! RT... | \n
\n \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n ... | \n
\n \n 24778 | \n 24778 | \n 24778 | \n 3 | \n 0 | \n 2 | \n 1 | \n 1 | \n (this text should be deleted) you's a muthaf**... | \n
\n \n 24779 | \n 24779 | \n 24779 | \n 3 | \n 0 | \n 1 | \n 2 | \n 2 | \n (this text should be deleted) you've gone and ... | \n
\n \n 24780 | \n 24780 | \n 24780 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n (this text should be deleted) young buck wanna... | \n
\n \n 24781 | \n 24781 | \n 24781 | \n 6 | \n 0 | \n 6 | \n 0 | \n 1 | \n (this text should be deleted) youu got wild bi... | \n
\n \n 24782 | \n 24782 | \n 24782 | \n 3 | \n 0 | \n 0 | \n 3 | \n 2 | \n (this text should be deleted) ~~Ruffled | Ntac... | \n
\n \n
\n
24783 rows × 8 columns
\n
"},"metadata":{}}]},{"cell_type":"code","source":"df.head(10)","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:05.060130Z","iopub.execute_input":"2024-03-16T02:39:05.060527Z","iopub.status.idle":"2024-03-16T02:39:05.074793Z","shell.execute_reply.started":"2024-03-16T02:39:05.060493Z","shell.execute_reply":"2024-03-16T02:39:05.073830Z"},"trusted":true},"execution_count":4,"outputs":[{"execution_count":4,"output_type":"execute_result","data":{"text/plain":" Unnamed: 0.1 Unnamed: 0 count hate_speech_count \\\n0 0 0 3 0 \n1 1 1 3 0 \n2 2 2 3 0 \n3 3 3 3 0 \n4 4 4 6 0 \n5 5 5 3 1 \n6 6 6 3 0 \n7 7 7 3 0 \n8 8 8 3 0 \n9 9 9 3 1 \n\n offensive_language_count neither_count class \\\n0 0 3 2 \n1 3 0 1 \n2 3 0 1 \n3 2 1 1 \n4 6 0 1 \n5 2 0 1 \n6 3 0 1 \n7 3 0 1 \n8 3 0 1 \n9 2 0 1 \n\n tweet \n0 (this text should be deleted) !!! RT @mayasolo... \n1 (this text should be deleted) !!!!! RT @mleew1... \n2 (this text should be deleted) !!!!!!! RT @UrKi... \n3 (this text should be deleted) !!!!!!!!! RT @C_... \n4 (this text should be deleted) !!!!!!!!!!!!! RT... \n5 (this text should be deleted) !!!!!!!!!!!!!!!!... \n6 (this text should be deleted) !!!!!!\"@__Bright... \n7 (this text should be deleted) !!!!“@self... \n8 (this text should be deleted) \" & you migh... \n9 (this text should be deleted) \" @rhythmixx_ :h... ","text/html":"\n\n
\n \n \n | \n Unnamed: 0.1 | \n Unnamed: 0 | \n count | \n hate_speech_count | \n offensive_language_count | \n neither_count | \n class | \n tweet | \n
\n \n \n \n 0 | \n 0 | \n 0 | \n 3 | \n 0 | \n 0 | \n 3 | \n 2 | \n (this text should be deleted) !!! RT @mayasolo... | \n
\n \n 1 | \n 1 | \n 1 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n (this text should be deleted) !!!!! RT @mleew1... | \n
\n \n 2 | \n 2 | \n 2 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n (this text should be deleted) !!!!!!! RT @UrKi... | \n
\n \n 3 | \n 3 | \n 3 | \n 3 | \n 0 | \n 2 | \n 1 | \n 1 | \n (this text should be deleted) !!!!!!!!! RT @C_... | \n
\n \n 4 | \n 4 | \n 4 | \n 6 | \n 0 | \n 6 | \n 0 | \n 1 | \n (this text should be deleted) !!!!!!!!!!!!! RT... | \n
\n \n 5 | \n 5 | \n 5 | \n 3 | \n 1 | \n 2 | \n 0 | \n 1 | \n (this text should be deleted) !!!!!!!!!!!!!!!!... | \n
\n \n 6 | \n 6 | \n 6 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n (this text should be deleted) !!!!!!\"@__Bright... | \n
\n \n 7 | \n 7 | \n 7 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n (this text should be deleted) !!!!“@self... | \n
\n \n 8 | \n 8 | \n 8 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n (this text should be deleted) \" & you migh... | \n
\n \n 9 | \n 9 | \n 9 | \n 3 | \n 1 | \n 2 | \n 0 | \n 1 | \n (this text should be deleted) \" @rhythmixx_ :h... | \n
\n \n
\n
"},"metadata":{}}]},{"cell_type":"code","source":"df.isna().sum()\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:05.076011Z","iopub.execute_input":"2024-03-16T02:39:05.077034Z","iopub.status.idle":"2024-03-16T02:39:05.098169Z","shell.execute_reply.started":"2024-03-16T02:39:05.076997Z","shell.execute_reply":"2024-03-16T02:39:05.097018Z"},"trusted":true},"execution_count":5,"outputs":[{"execution_count":5,"output_type":"execute_result","data":{"text/plain":"Unnamed: 0.1 0\nUnnamed: 0 0\ncount 0\nhate_speech_count 0\noffensive_language_count 0\nneither_count 0\nclass 0\ntweet 0\ndtype: int64"},"metadata":{}}]},{"cell_type":"code","source":"df.describe()\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:05.099512Z","iopub.execute_input":"2024-03-16T02:39:05.100034Z","iopub.status.idle":"2024-03-16T02:39:05.139026Z","shell.execute_reply.started":"2024-03-16T02:39:05.100002Z","shell.execute_reply":"2024-03-16T02:39:05.137877Z"},"trusted":true},"execution_count":6,"outputs":[{"execution_count":6,"output_type":"execute_result","data":{"text/plain":" Unnamed: 0.1 Unnamed: 0 count hate_speech_count \\\ncount 24783.000000 24783.000000 24783.000000 24783.000000 \nmean 12391.000000 12391.000000 3.243473 0.280515 \nstd 7154.380197 7154.380197 0.883060 0.631851 \nmin 0.000000 0.000000 3.000000 0.000000 \n25% 6195.500000 6195.500000 3.000000 0.000000 \n50% 12391.000000 12391.000000 3.000000 0.000000 \n75% 18586.500000 18586.500000 3.000000 0.000000 \nmax 24782.000000 24782.000000 9.000000 7.000000 \n\n offensive_language_count neither_count class \ncount 24783.000000 24783.000000 24783.000000 \nmean 2.413711 0.549247 1.110277 \nstd 1.399459 1.113299 0.462089 \nmin 0.000000 0.000000 0.000000 \n25% 2.000000 0.000000 1.000000 \n50% 3.000000 0.000000 1.000000 \n75% 3.000000 0.000000 1.000000 \nmax 9.000000 9.000000 2.000000 ","text/html":"\n\n
\n \n \n | \n Unnamed: 0.1 | \n Unnamed: 0 | \n count | \n hate_speech_count | \n offensive_language_count | \n neither_count | \n class | \n
\n \n \n \n count | \n 24783.000000 | \n 24783.000000 | \n 24783.000000 | \n 24783.000000 | \n 24783.000000 | \n 24783.000000 | \n 24783.000000 | \n
\n \n mean | \n 12391.000000 | \n 12391.000000 | \n 3.243473 | \n 0.280515 | \n 2.413711 | \n 0.549247 | \n 1.110277 | \n
\n \n std | \n 7154.380197 | \n 7154.380197 | \n 0.883060 | \n 0.631851 | \n 1.399459 | \n 1.113299 | \n 0.462089 | \n
\n \n min | \n 0.000000 | \n 0.000000 | \n 3.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n 0.000000 | \n
\n \n 25% | \n 6195.500000 | \n 6195.500000 | \n 3.000000 | \n 0.000000 | \n 2.000000 | \n 0.000000 | \n 1.000000 | \n
\n \n 50% | \n 12391.000000 | \n 12391.000000 | \n 3.000000 | \n 0.000000 | \n 3.000000 | \n 0.000000 | \n 1.000000 | \n
\n \n 75% | \n 18586.500000 | \n 18586.500000 | \n 3.000000 | \n 0.000000 | \n 3.000000 | \n 0.000000 | \n 1.000000 | \n
\n \n max | \n 24782.000000 | \n 24782.000000 | \n 9.000000 | \n 7.000000 | \n 9.000000 | \n 9.000000 | \n 2.000000 | \n
\n \n
\n
"},"metadata":{}}]},{"cell_type":"code","source":"df['class'].value_counts()\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:05.140320Z","iopub.execute_input":"2024-03-16T02:39:05.140705Z","iopub.status.idle":"2024-03-16T02:39:05.162895Z","shell.execute_reply.started":"2024-03-16T02:39:05.140675Z","shell.execute_reply":"2024-03-16T02:39:05.161740Z"},"trusted":true},"execution_count":7,"outputs":[{"execution_count":7,"output_type":"execute_result","data":{"text/plain":"class\n1 19190\n2 4163\n0 1430\nName: count, dtype: int64"},"metadata":{}}]},{"cell_type":"code","source":"random = np.random.randint(0,len(df))\nprint(random)\ndf.iloc[random]['tweet']","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:05.164338Z","iopub.execute_input":"2024-03-16T02:39:05.164780Z","iopub.status.idle":"2024-03-16T02:39:05.177130Z","shell.execute_reply.started":"2024-03-16T02:39:05.164751Z","shell.execute_reply":"2024-03-16T02:39:05.176307Z"},"trusted":true},"execution_count":8,"outputs":[{"name":"stdout","text":"11283\n","output_type":"stream"},{"execution_count":8,"output_type":"execute_result","data":{"text/plain":"\"(this text should be deleted) I'm going to punch a bitch if my cleats don't come in😠\""},"metadata":{}}]},{"cell_type":"code","source":"dfs = df.copy()\n\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:05.178607Z","iopub.execute_input":"2024-03-16T02:39:05.179182Z","iopub.status.idle":"2024-03-16T02:39:05.185210Z","shell.execute_reply.started":"2024-03-16T02:39:05.179150Z","shell.execute_reply":"2024-03-16T02:39:05.184415Z"},"trusted":true},"execution_count":9,"outputs":[]},{"cell_type":"code","source":"def remove_stopwords(text):\n stop_words = set(stopwords.words('english'))\n words = nltk.word_tokenize(text)\n filtered_words = [word for word in words if word not in stop_words]\n return filtered_words\ndfs['tweet'] = dfs['tweet'].astype(str)\ndfs['tweet'] = dfs['tweet'].apply(remove_stopwords)\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:05.189321Z","iopub.execute_input":"2024-03-16T02:39:05.189918Z","iopub.status.idle":"2024-03-16T02:39:19.302019Z","shell.execute_reply.started":"2024-03-16T02:39:05.189886Z","shell.execute_reply":"2024-03-16T02:39:19.300904Z"},"trusted":true},"execution_count":10,"outputs":[]},{"cell_type":"code","source":"dfs['tweet'].head()\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:19.303197Z","iopub.execute_input":"2024-03-16T02:39:19.303522Z","iopub.status.idle":"2024-03-16T02:39:19.313912Z","shell.execute_reply.started":"2024-03-16T02:39:19.303494Z","shell.execute_reply":"2024-03-16T02:39:19.312752Z"},"trusted":true},"execution_count":11,"outputs":[{"execution_count":11,"output_type":"execute_result","data":{"text/plain":"0 [(, text, deleted, ), !, !, !, RT, @, mayasolo...\n1 [(, text, deleted, ), !, !, !, !, !, RT, @, ml...\n2 [(, text, deleted, ), !, !, !, !, !, !, !, RT,...\n3 [(, text, deleted, ), !, !, !, !, !, !, !, !, ...\n4 [(, text, deleted, ), !, !, !, !, !, !, !, !, ...\nName: tweet, dtype: object"},"metadata":{}}]},{"cell_type":"code","source":"dfs['tweet'].replace(r'@\\w+','',regex=True,inplace=True)\n\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:19.315420Z","iopub.execute_input":"2024-03-16T02:39:19.316319Z","iopub.status.idle":"2024-03-16T02:39:19.357357Z","shell.execute_reply.started":"2024-03-16T02:39:19.316266Z","shell.execute_reply":"2024-03-16T02:39:19.356076Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"code","source":"dfs['tweet'].head()\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:19.359276Z","iopub.execute_input":"2024-03-16T02:39:19.359996Z","iopub.status.idle":"2024-03-16T02:39:19.370959Z","shell.execute_reply.started":"2024-03-16T02:39:19.359960Z","shell.execute_reply":"2024-03-16T02:39:19.369781Z"},"trusted":true},"execution_count":13,"outputs":[{"execution_count":13,"output_type":"execute_result","data":{"text/plain":"0 [(, text, deleted, ), !, !, !, RT, @, mayasolo...\n1 [(, text, deleted, ), !, !, !, !, !, RT, @, ml...\n2 [(, text, deleted, ), !, !, !, !, !, !, !, RT,...\n3 [(, text, deleted, ), !, !, !, !, !, !, !, !, ...\n4 [(, text, deleted, ), !, !, !, !, !, !, !, !, ...\nName: tweet, dtype: object"},"metadata":{}}]},{"cell_type":"code","source":"dfs['tweet'].replace(r'http\\S+','',regex=True,inplace=True)\n\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:19.372629Z","iopub.execute_input":"2024-03-16T02:39:19.373128Z","iopub.status.idle":"2024-03-16T02:39:19.398068Z","shell.execute_reply.started":"2024-03-16T02:39:19.373098Z","shell.execute_reply":"2024-03-16T02:39:19.397052Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"dfs['tweet'].head()\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:19.399394Z","iopub.execute_input":"2024-03-16T02:39:19.400261Z","iopub.status.idle":"2024-03-16T02:39:19.410380Z","shell.execute_reply.started":"2024-03-16T02:39:19.400223Z","shell.execute_reply":"2024-03-16T02:39:19.409227Z"},"trusted":true},"execution_count":15,"outputs":[{"execution_count":15,"output_type":"execute_result","data":{"text/plain":"0 [(, text, deleted, ), !, !, !, RT, @, mayasolo...\n1 [(, text, deleted, ), !, !, !, !, !, RT, @, ml...\n2 [(, text, deleted, ), !, !, !, !, !, !, !, RT,...\n3 [(, text, deleted, ), !, !, !, !, !, !, !, !, ...\n4 [(, text, deleted, ), !, !, !, !, !, !, !, !, ...\nName: tweet, dtype: object"},"metadata":{}}]},{"cell_type":"code","source":"tokenizer = TweetTokenizer(preserve_case=True)\ndfs['tweet'] = dfs['tweet'].astype(str)\ndfs['tweet'] = dfs['tweet'].apply(tokenizer.tokenize)","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:19.412152Z","iopub.execute_input":"2024-03-16T02:39:19.413013Z","iopub.status.idle":"2024-03-16T02:39:25.994694Z","shell.execute_reply.started":"2024-03-16T02:39:19.412970Z","shell.execute_reply":"2024-03-16T02:39:25.993548Z"},"trusted":true},"execution_count":16,"outputs":[]},{"cell_type":"code","source":"dfs.head(3)\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:25.995962Z","iopub.execute_input":"2024-03-16T02:39:25.996300Z","iopub.status.idle":"2024-03-16T02:39:26.014807Z","shell.execute_reply.started":"2024-03-16T02:39:25.996271Z","shell.execute_reply":"2024-03-16T02:39:26.013541Z"},"trusted":true},"execution_count":17,"outputs":[{"execution_count":17,"output_type":"execute_result","data":{"text/plain":" Unnamed: 0.1 Unnamed: 0 count hate_speech_count \\\n0 0 0 3 0 \n1 1 1 3 0 \n2 2 2 3 0 \n\n offensive_language_count neither_count class \\\n0 0 3 2 \n1 3 0 1 \n2 3 0 1 \n\n tweet \n0 [[, ', (, ', ,, ', text, ', ,, ', deleted, ', ... \n1 [[, ', (, ', ,, ', text, ', ,, ', deleted, ', ... \n2 [[, ', (, ', ,, ', text, ', ,, ', deleted, ', ... ","text/html":"\n\n
\n \n \n | \n Unnamed: 0.1 | \n Unnamed: 0 | \n count | \n hate_speech_count | \n offensive_language_count | \n neither_count | \n class | \n tweet | \n
\n \n \n \n 0 | \n 0 | \n 0 | \n 3 | \n 0 | \n 0 | \n 3 | \n 2 | \n [[, ', (, ', ,, ', text, ', ,, ', deleted, ', ... | \n
\n \n 1 | \n 1 | \n 1 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n [[, ', (, ', ,, ', text, ', ,, ', deleted, ', ... | \n
\n \n 2 | \n 2 | \n 2 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n [[, ', (, ', ,, ', text, ', ,, ', deleted, ', ... | \n
\n \n
\n
"},"metadata":{}}]},{"cell_type":"code","source":"dfs['tweet'].head()\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:26.015948Z","iopub.execute_input":"2024-03-16T02:39:26.016299Z","iopub.status.idle":"2024-03-16T02:39:26.035085Z","shell.execute_reply.started":"2024-03-16T02:39:26.016269Z","shell.execute_reply":"2024-03-16T02:39:26.033838Z"},"trusted":true},"execution_count":18,"outputs":[{"execution_count":18,"output_type":"execute_result","data":{"text/plain":"0 [[, ', (, ', ,, ', text, ', ,, ', deleted, ', ...\n1 [[, ', (, ', ,, ', text, ', ,, ', deleted, ', ...\n2 [[, ', (, ', ,, ', text, ', ,, ', deleted, ', ...\n3 [[, ', (, ', ,, ', text, ', ,, ', deleted, ', ...\n4 [[, ', (, ', ,, ', text, ', ,, ', deleted, ', ...\nName: tweet, dtype: object"},"metadata":{}}]},{"cell_type":"code","source":"def remove_hashsymbols(text):\n '''Function to remove the hashtag symbol from the text'''\n pattern = re.compile(r'#')\n text = ' '.join(text)\n clean_text = re.sub(pattern,'',text)\n return tokenizer.tokenize(clean_text) \n\ndfs['tweet'] = dfs['tweet'].apply(remove_hashsymbols)\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:26.037149Z","iopub.execute_input":"2024-03-16T02:39:26.038245Z","iopub.status.idle":"2024-03-16T02:39:34.943953Z","shell.execute_reply.started":"2024-03-16T02:39:26.038209Z","shell.execute_reply":"2024-03-16T02:39:34.942616Z"},"trusted":true},"execution_count":19,"outputs":[]},{"cell_type":"code","source":"dfs['tweet'].head()\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:34.945463Z","iopub.execute_input":"2024-03-16T02:39:34.945835Z","iopub.status.idle":"2024-03-16T02:39:34.957903Z","shell.execute_reply.started":"2024-03-16T02:39:34.945802Z","shell.execute_reply":"2024-03-16T02:39:34.956666Z"},"trusted":true},"execution_count":20,"outputs":[{"execution_count":20,"output_type":"execute_result","data":{"text/plain":"0 [[, ', (, ', ,, ', text, ', ,, ', deleted, ', ...\n1 [[, ', (, ', ,, ', text, ', ,, ', deleted, ', ...\n2 [[, ', (, ', ,, ', text, ', ,, ', deleted, ', ...\n3 [[, ', (, ', ,, ', text, ', ,, ', deleted, ', ...\n4 [[, ', (, ', ,, ', text, ', ,, ', deleted, ', ...\nName: tweet, dtype: object"},"metadata":{}}]},{"cell_type":"code","source":"def rem_shortwords(text):\n words = word_tokenize(text)\n filtered_words = [word.lower() for word in words if len(word) > 2] # Convert words to lowercase, filter out short words, and exclude specified words\n new_text = ' '.join(filtered_words)\n return new_text\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:34.959572Z","iopub.execute_input":"2024-03-16T02:39:34.960037Z","iopub.status.idle":"2024-03-16T02:39:34.970554Z","shell.execute_reply.started":"2024-03-16T02:39:34.959989Z","shell.execute_reply":"2024-03-16T02:39:34.969264Z"},"trusted":true},"execution_count":21,"outputs":[]},{"cell_type":"code","source":"\ndfs['tweet'] = dfs['tweet'].astype(str)\n\ndfs['tweet'] = dfs['tweet'].apply(rem_shortwords)\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:39:34.972276Z","iopub.execute_input":"2024-03-16T02:39:34.972900Z","iopub.status.idle":"2024-03-16T02:40:13.829104Z","shell.execute_reply.started":"2024-03-16T02:39:34.972853Z","shell.execute_reply":"2024-03-16T02:40:13.828062Z"},"trusted":true},"execution_count":22,"outputs":[]},{"cell_type":"code","source":"dfs.head(3)\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:40:13.830672Z","iopub.execute_input":"2024-03-16T02:40:13.830987Z","iopub.status.idle":"2024-03-16T02:40:13.843753Z","shell.execute_reply.started":"2024-03-16T02:40:13.830958Z","shell.execute_reply":"2024-03-16T02:40:13.842364Z"},"trusted":true},"execution_count":23,"outputs":[{"execution_count":23,"output_type":"execute_result","data":{"text/plain":" Unnamed: 0.1 Unnamed: 0 count hate_speech_count \\\n0 0 0 3 0 \n1 1 1 3 0 \n2 2 2 3 0 \n\n offensive_language_count neither_count class \\\n0 0 3 2 \n1 3 0 1 \n2 3 0 1 \n\n tweet \n0 'text 'deleted 'rt 'mayasolovely 'as 'woman n'... \n1 'text 'deleted 'rt 'mleew '17 'boy 'dats 'cold... \n2 'text 'deleted 'rt 'urkindofbrand 'dawg 'rt '8... ","text/html":"\n\n
\n \n \n | \n Unnamed: 0.1 | \n Unnamed: 0 | \n count | \n hate_speech_count | \n offensive_language_count | \n neither_count | \n class | \n tweet | \n
\n \n \n \n 0 | \n 0 | \n 0 | \n 3 | \n 0 | \n 0 | \n 3 | \n 2 | \n 'text 'deleted 'rt 'mayasolovely 'as 'woman n'... | \n
\n \n 1 | \n 1 | \n 1 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n 'text 'deleted 'rt 'mleew '17 'boy 'dats 'cold... | \n
\n \n 2 | \n 2 | \n 2 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n 'text 'deleted 'rt 'urkindofbrand 'dawg 'rt '8... | \n
\n \n
\n
"},"metadata":{}}]},{"cell_type":"code","source":"dfs['tweet'] = dfs['tweet'].apply(tokenizer.tokenize)\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:40:13.845669Z","iopub.execute_input":"2024-03-16T02:40:13.846613Z","iopub.status.idle":"2024-03-16T02:40:16.685598Z","shell.execute_reply.started":"2024-03-16T02:40:13.846555Z","shell.execute_reply":"2024-03-16T02:40:16.684305Z"},"trusted":true},"execution_count":24,"outputs":[]},{"cell_type":"code","source":"dfs['tweet'].head()\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:40:16.686932Z","iopub.execute_input":"2024-03-16T02:40:16.687307Z","iopub.status.idle":"2024-03-16T02:40:16.699681Z","shell.execute_reply.started":"2024-03-16T02:40:16.687274Z","shell.execute_reply":"2024-03-16T02:40:16.698318Z"},"trusted":true},"execution_count":25,"outputs":[{"execution_count":25,"output_type":"execute_result","data":{"text/plain":"0 [', text, ', deleted, ', rt, ', mayasolovely, ...\n1 [', text, ', deleted, ', rt, ', mleew, ', 17, ...\n2 [', text, ', deleted, ', rt, ', urkindofbrand,...\n3 [', text, ', deleted, ', rt, ', c_g_anderson, ...\n4 [', text, ', deleted, ', rt, ', shenikaroberts...\nName: tweet, dtype: object"},"metadata":{}}]},{"cell_type":"code","source":"def rem_digits(text):\n '''Function to remove the digits from the list of strings'''\n no_digits = []\n for word in text:\n no_digits.append(re.sub(r'\\d','',word))\n return ' '.join(no_digits) ","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:40:16.701222Z","iopub.execute_input":"2024-03-16T02:40:16.701699Z","iopub.status.idle":"2024-03-16T02:40:16.711798Z","shell.execute_reply.started":"2024-03-16T02:40:16.701658Z","shell.execute_reply":"2024-03-16T02:40:16.710506Z"},"trusted":true},"execution_count":26,"outputs":[]},{"cell_type":"code","source":"dfs['tweet'] = dfs['tweet'].apply(rem_digits)\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:40:16.713219Z","iopub.execute_input":"2024-03-16T02:40:16.713700Z","iopub.status.idle":"2024-03-16T02:40:17.781822Z","shell.execute_reply.started":"2024-03-16T02:40:16.713645Z","shell.execute_reply":"2024-03-16T02:40:17.780644Z"},"trusted":true},"execution_count":27,"outputs":[]},{"cell_type":"code","source":"dfs['tweet'] = dfs['tweet'].apply(tokenizer.tokenize)\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:40:17.788576Z","iopub.execute_input":"2024-03-16T02:40:17.788947Z","iopub.status.idle":"2024-03-16T02:40:21.065025Z","shell.execute_reply.started":"2024-03-16T02:40:17.788919Z","shell.execute_reply":"2024-03-16T02:40:21.063912Z"},"trusted":true},"execution_count":28,"outputs":[]},{"cell_type":"code","source":"dfs.head()\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:40:21.066174Z","iopub.execute_input":"2024-03-16T02:40:21.066513Z","iopub.status.idle":"2024-03-16T02:40:21.083597Z","shell.execute_reply.started":"2024-03-16T02:40:21.066485Z","shell.execute_reply":"2024-03-16T02:40:21.082325Z"},"trusted":true},"execution_count":29,"outputs":[{"execution_count":29,"output_type":"execute_result","data":{"text/plain":" Unnamed: 0.1 Unnamed: 0 count hate_speech_count \\\n0 0 0 3 0 \n1 1 1 3 0 \n2 2 2 3 0 \n3 3 3 3 0 \n4 4 4 6 0 \n\n offensive_language_count neither_count class \\\n0 0 3 2 \n1 3 0 1 \n2 3 0 1 \n3 2 1 1 \n4 6 0 1 \n\n tweet \n0 [', text, ', deleted, ', rt, ', mayasolovely, ... \n1 [', text, ', deleted, ', rt, ', mleew, ', ', b... \n2 [', text, ', deleted, ', rt, ', urkindofbrand,... \n3 [', text, ', deleted, ', rt, ', c_g_anderson, ... \n4 [', text, ', deleted, ', rt, ', shenikaroberts... ","text/html":"\n\n
\n \n \n | \n Unnamed: 0.1 | \n Unnamed: 0 | \n count | \n hate_speech_count | \n offensive_language_count | \n neither_count | \n class | \n tweet | \n
\n \n \n \n 0 | \n 0 | \n 0 | \n 3 | \n 0 | \n 0 | \n 3 | \n 2 | \n [', text, ', deleted, ', rt, ', mayasolovely, ... | \n
\n \n 1 | \n 1 | \n 1 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n [', text, ', deleted, ', rt, ', mleew, ', ', b... | \n
\n \n 2 | \n 2 | \n 2 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n [', text, ', deleted, ', rt, ', urkindofbrand,... | \n
\n \n 3 | \n 3 | \n 3 | \n 3 | \n 0 | \n 2 | \n 1 | \n 1 | \n [', text, ', deleted, ', rt, ', c_g_anderson, ... | \n
\n \n 4 | \n 4 | \n 4 | \n 6 | \n 0 | \n 6 | \n 0 | \n 1 | \n [', text, ', deleted, ', rt, ', shenikaroberts... | \n
\n \n
\n
"},"metadata":{}}]},{"cell_type":"code","source":"def rem_nonalpha(text):\n '''Function to remove the non-alphanumeric characters from the text'''\n text = [word for word in text if word.isalpha()]\n return text","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:40:21.085363Z","iopub.execute_input":"2024-03-16T02:40:21.085897Z","iopub.status.idle":"2024-03-16T02:40:21.094562Z","shell.execute_reply.started":"2024-03-16T02:40:21.085852Z","shell.execute_reply":"2024-03-16T02:40:21.092783Z"},"trusted":true},"execution_count":30,"outputs":[]},{"cell_type":"code","source":"dfs['tweet'] = dfs['tweet'].apply(rem_nonalpha)\n\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:40:21.096056Z","iopub.execute_input":"2024-03-16T02:40:21.096528Z","iopub.status.idle":"2024-03-16T02:40:21.199888Z","shell.execute_reply.started":"2024-03-16T02:40:21.096487Z","shell.execute_reply":"2024-03-16T02:40:21.198751Z"},"trusted":true},"execution_count":31,"outputs":[]},{"cell_type":"code","source":"dfs.head()\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:40:21.201120Z","iopub.execute_input":"2024-03-16T02:40:21.201475Z","iopub.status.idle":"2024-03-16T02:40:21.216254Z","shell.execute_reply.started":"2024-03-16T02:40:21.201446Z","shell.execute_reply":"2024-03-16T02:40:21.215055Z"},"trusted":true},"execution_count":32,"outputs":[{"execution_count":32,"output_type":"execute_result","data":{"text/plain":" Unnamed: 0.1 Unnamed: 0 count hate_speech_count \\\n0 0 0 3 0 \n1 1 1 3 0 \n2 2 2 3 0 \n3 3 3 3 0 \n4 4 4 6 0 \n\n offensive_language_count neither_count class \\\n0 0 3 2 \n1 3 0 1 \n2 3 0 1 \n3 2 1 1 \n4 6 0 1 \n\n tweet \n0 [text, deleted, rt, mayasolovely, as, woman, c... \n1 [text, deleted, rt, mleew, boy, dats, cold, ty... \n2 [text, deleted, rt, urkindofbrand, dawg, rt, s... \n3 [text, deleted, rt, look, like, tranny] \n4 [text, deleted, rt, shenikaroberts, the, shit,... ","text/html":"\n\n
\n \n \n | \n Unnamed: 0.1 | \n Unnamed: 0 | \n count | \n hate_speech_count | \n offensive_language_count | \n neither_count | \n class | \n tweet | \n
\n \n \n \n 0 | \n 0 | \n 0 | \n 3 | \n 0 | \n 0 | \n 3 | \n 2 | \n [text, deleted, rt, mayasolovely, as, woman, c... | \n
\n \n 1 | \n 1 | \n 1 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n [text, deleted, rt, mleew, boy, dats, cold, ty... | \n
\n \n 2 | \n 2 | \n 2 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n [text, deleted, rt, urkindofbrand, dawg, rt, s... | \n
\n \n 3 | \n 3 | \n 3 | \n 3 | \n 0 | \n 2 | \n 1 | \n 1 | \n [text, deleted, rt, look, like, tranny] | \n
\n \n 4 | \n 4 | \n 4 | \n 6 | \n 0 | \n 6 | \n 0 | \n 1 | \n [text, deleted, rt, shenikaroberts, the, shit,... | \n
\n \n
\n
"},"metadata":{}}]},{"cell_type":"code","source":"dfs['tweet'] = dfs['tweet'].apply(lambda x: ' '.join(x))\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:40:21.217891Z","iopub.execute_input":"2024-03-16T02:40:21.218278Z","iopub.status.idle":"2024-03-16T02:40:21.252498Z","shell.execute_reply.started":"2024-03-16T02:40:21.218247Z","shell.execute_reply":"2024-03-16T02:40:21.251131Z"},"trusted":true},"execution_count":33,"outputs":[]},{"cell_type":"code","source":"random = np.random.randint(0,len(df))\nprint(random)\ndfs.iloc[random]['tweet']","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:40:52.361713Z","iopub.execute_input":"2024-03-16T02:40:52.362583Z","iopub.status.idle":"2024-03-16T02:40:52.372177Z","shell.execute_reply.started":"2024-03-16T02:40:52.362544Z","shell.execute_reply":"2024-03-16T02:40:52.370810Z"},"trusted":true},"execution_count":40,"outputs":[{"name":"stdout","text":"9041\n","output_type":"stream"},{"execution_count":40,"output_type":"execute_result","data":{"text/plain":"'text deleted facebook birds'"},"metadata":{}}]},{"cell_type":"code","source":"dfs.head(3)\n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:40:21.266401Z","iopub.execute_input":"2024-03-16T02:40:21.266953Z","iopub.status.idle":"2024-03-16T02:40:21.285546Z","shell.execute_reply.started":"2024-03-16T02:40:21.266919Z","shell.execute_reply":"2024-03-16T02:40:21.284291Z"},"trusted":true},"execution_count":35,"outputs":[{"execution_count":35,"output_type":"execute_result","data":{"text/plain":" Unnamed: 0.1 Unnamed: 0 count hate_speech_count \\\n0 0 0 3 0 \n1 1 1 3 0 \n2 2 2 3 0 \n\n offensive_language_count neither_count class \\\n0 0 3 2 \n1 3 0 1 \n2 3 0 1 \n\n tweet \n0 text deleted rt mayasolovely as woman complain... \n1 text deleted rt mleew boy dats cold tyga dwn b... \n2 text deleted rt urkindofbrand dawg rt sbabylif... ","text/html":"\n\n
\n \n \n | \n Unnamed: 0.1 | \n Unnamed: 0 | \n count | \n hate_speech_count | \n offensive_language_count | \n neither_count | \n class | \n tweet | \n
\n \n \n \n 0 | \n 0 | \n 0 | \n 3 | \n 0 | \n 0 | \n 3 | \n 2 | \n text deleted rt mayasolovely as woman complain... | \n
\n \n 1 | \n 1 | \n 1 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n text deleted rt mleew boy dats cold tyga dwn b... | \n
\n \n 2 | \n 2 | \n 2 | \n 3 | \n 0 | \n 3 | \n 0 | \n 1 | \n text deleted rt urkindofbrand dawg rt sbabylif... | \n
\n \n
\n
"},"metadata":{}}]},{"cell_type":"code","source":"token = Tokenizer(28164)\ntoken.fit_on_texts(dfs['tweet'])\ntext = token.texts_to_sequences(dfs['tweet'])\ntext = pad_sequences(text, maxlen=100)","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:40:21.287477Z","iopub.execute_input":"2024-03-16T02:40:21.288266Z","iopub.status.idle":"2024-03-16T02:40:22.495929Z","shell.execute_reply.started":"2024-03-16T02:40:21.288202Z","shell.execute_reply":"2024-03-16T02:40:22.494653Z"},"trusted":true},"execution_count":36,"outputs":[]},{"cell_type":"code","source":"X = dfs['tweet']\ny = dfs['class']","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:40:22.498681Z","iopub.execute_input":"2024-03-16T02:40:22.499568Z","iopub.status.idle":"2024-03-16T02:40:22.505492Z","shell.execute_reply.started":"2024-03-16T02:40:22.499523Z","shell.execute_reply":"2024-03-16T02:40:22.504223Z"},"trusted":true},"execution_count":37,"outputs":[]},{"cell_type":"code","source":"from sklearn.model_selection import train_test_split\n\ntest_size = 0.20 #20% of the data in the \nX_train,X_test,y_train,y_test = train_test_split(text,y,test_size=0.20,random_state=42,stratify=dfs['class'])\nprint(X_train.shape,X_test.shape,y_train.shape,y_test.shape)","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:40:22.506750Z","iopub.execute_input":"2024-03-16T02:40:22.507145Z","iopub.status.idle":"2024-03-16T02:40:22.543431Z","shell.execute_reply.started":"2024-03-16T02:40:22.507118Z","shell.execute_reply":"2024-03-16T02:40:22.542120Z"},"trusted":true},"execution_count":38,"outputs":[{"name":"stdout","text":"(19826, 100) (4957, 100) (19826,) (4957,)\n","output_type":"stream"}]},{"cell_type":"code","source":"from tensorflow.keras.callbacks import EarlyStopping\nmodel = tf.keras.Sequential([\n tf.keras.layers.Embedding(input_dim=24071, output_dim=32),\n tf.keras.layers.LSTM(32, dropout=0.2), # Add dropout to LSTM layer\n tf.keras.layers.Dense(3, activation='softmax')\n])\n\n# Compile the model\nmodel.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])\n\n# Define early stopping\nearly_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True) \n# Stop training if validation loss does not improve after 3 epochs, restore best weights\n\n# Train the model with early stopping\nhistory = model.fit(X_train, y_train, validation_split=0.2, epochs=3, batch_size=16, callbacks=[early_stopping]) \n","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:43:22.950096Z","iopub.execute_input":"2024-03-16T02:43:22.950602Z","iopub.status.idle":"2024-03-16T02:45:28.470200Z","shell.execute_reply.started":"2024-03-16T02:43:22.950566Z","shell.execute_reply":"2024-03-16T02:45:28.469183Z"},"trusted":true},"execution_count":41,"outputs":[{"name":"stdout","text":"Epoch 1/3\n\u001b[1m992/992\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m42s\u001b[0m 39ms/step - accuracy: 0.8117 - loss: 0.5500 - val_accuracy: 0.9065 - val_loss: 0.2622\nEpoch 2/3\n\u001b[1m992/992\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m40s\u001b[0m 39ms/step - accuracy: 0.9238 - loss: 0.2265 - val_accuracy: 0.9130 - val_loss: 0.2464\nEpoch 3/3\n\u001b[1m992/992\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m41s\u001b[0m 39ms/step - accuracy: 0.9528 - loss: 0.1438 - val_accuracy: 0.8994 - val_loss: 0.2943\n","output_type":"stream"}]},{"cell_type":"code","source":"loss, accuracy = model.evaluate(X_test, y_test)\nprint(f'Test Accuracy: {accuracy}')","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:45:34.299707Z","iopub.execute_input":"2024-03-16T02:45:34.300409Z","iopub.status.idle":"2024-03-16T02:45:36.929295Z","shell.execute_reply.started":"2024-03-16T02:45:34.300374Z","shell.execute_reply":"2024-03-16T02:45:36.928069Z"},"trusted":true},"execution_count":42,"outputs":[{"name":"stdout","text":"\u001b[1m155/155\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 11ms/step - accuracy: 0.9075 - loss: 0.2566\nTest Accuracy: 0.900948166847229\n","output_type":"stream"}]},{"cell_type":"code","source":"from sklearn.metrics import classification_report\n\ny_pred = model.predict(X_test)\n\ny_pred_labels = np.argmax(y_pred, axis=1)\n\nprint('Classification Report Testing set')\nprint('\\n')\nprint(classification_report(y_test,y_pred_labels))","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:45:43.012788Z","iopub.execute_input":"2024-03-16T02:45:43.013502Z","iopub.status.idle":"2024-03-16T02:45:45.485938Z","shell.execute_reply.started":"2024-03-16T02:45:43.013458Z","shell.execute_reply":"2024-03-16T02:45:45.484530Z"},"trusted":true},"execution_count":43,"outputs":[{"name":"stdout","text":"\u001b[1m155/155\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 14ms/step\nClassification Report Testing set\n\n\n precision recall f1-score support\n\n 0 0.49 0.26 0.34 286\n 1 0.94 0.95 0.94 3838\n 2 0.82 0.91 0.86 833\n\n accuracy 0.90 4957\n macro avg 0.75 0.70 0.71 4957\nweighted avg 0.89 0.90 0.89 4957\n\n","output_type":"stream"}]},{"cell_type":"code","source":"from sklearn.metrics import classification_report\n\ny_train_pred = model.predict(X_train)\n\ny_train_pred_labels = np.argmax(y_train_pred, axis=1)\nprint('Classification Report training set')\nprint('\\n')\ntrain_report = classification_report(y_train, y_train_pred_labels)\n\nprint(train_report)","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:45:55.927882Z","iopub.execute_input":"2024-03-16T02:45:55.928643Z","iopub.status.idle":"2024-03-16T02:46:03.185520Z","shell.execute_reply.started":"2024-03-16T02:45:55.928607Z","shell.execute_reply":"2024-03-16T02:46:03.184440Z"},"trusted":true},"execution_count":44,"outputs":[{"name":"stdout","text":"\u001b[1m620/620\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 11ms/step\nClassification Report training set\n\n\n precision recall f1-score support\n\n 0 0.75 0.53 0.62 1144\n 1 0.96 0.97 0.97 15352\n 2 0.92 0.96 0.94 3330\n\n accuracy 0.95 19826\n macro avg 0.88 0.82 0.84 19826\nweighted avg 0.94 0.95 0.94 19826\n\n","output_type":"stream"}]},{"cell_type":"code","source":"from sklearn.metrics import confusion_matrix\nimport seaborn as sns\nimport matplotlib.pyplot as plt\n\ny_pred = model.predict(X_test)\n\ny_pred_labels = np.argmax(y_pred, axis=1)\n\ncm = confusion_matrix(y_test, y_pred_labels)\n\nplt.figure(figsize=(8, 6))\nsns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=[0, 1, 2], yticklabels=[0, 1, 2])\nplt.xlabel('Predicted Labels')\nplt.ylabel('True Labels')\nplt.title('Confusion Matrix')\nplt.show()\nprint(cm)","metadata":{"execution":{"iopub.status.busy":"2024-03-16T02:46:09.969169Z","iopub.execute_input":"2024-03-16T02:46:09.969603Z","iopub.status.idle":"2024-03-16T02:46:12.266030Z","shell.execute_reply.started":"2024-03-16T02:46:09.969558Z","shell.execute_reply":"2024-03-16T02:46:12.264923Z"},"trusted":true},"execution_count":45,"outputs":[{"name":"stdout","text":"\u001b[1m155/155\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m2s\u001b[0m 11ms/step\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"