{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.8.16","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"code","source":"!pip install transformers[sentencepiece] huggingface -q","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:47:53.058331Z","iopub.execute_input":"2023-05-12T11:47:53.058582Z","iopub.status.idle":"2023-05-12T11:48:11.528155Z","shell.execute_reply.started":"2023-05-12T11:47:53.058558Z","shell.execute_reply":"2023-05-12T11:48:11.527021Z"},"trusted":true},"execution_count":1,"outputs":[{"name":"stdout","text":"\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\ntensorflow 2.12.0 requires protobuf!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<5.0.0dev,>=3.20.3, but you have protobuf 3.20.2 which is incompatible.\ntensorflow-metadata 1.13.1 requires protobuf<5,>=3.20.3, but you have protobuf 3.20.2 which is incompatible.\u001b[0m\u001b[31m\n\u001b[0m\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n\u001b[0m\u001b[33mWARNING: You are using pip version 22.0.4; however, version 23.1.2 is available.\nYou should consider upgrading via the '/usr/local/bin/python -m pip install --upgrade pip' command.\u001b[0m\u001b[33m\n\u001b[0m","output_type":"stream"}]},{"cell_type":"code","source":"import warnings\nwarnings.filterwarnings('ignore')\n\nimport pandas as pd\nimport numpy as np\nimport matplotlib.pyplot as plt\n#import seaborn as sns\nimport os\nimport tensorflow as tf\nfrom tensorflow.keras.layers import Input, Dense, GlobalMaxPool1D, Dropout\nfrom tensorflow.keras.models import Model\nfrom tensorflow.data import Dataset\n\nimport transformers\nfrom transformers import AutoTokenizer, TFAutoModel\n\nfrom huggingface_hub import notebook_login, push_to_hub_keras, from_pretrained_keras","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:49:51.308904Z","iopub.execute_input":"2023-05-12T11:49:51.309754Z","iopub.status.idle":"2023-05-12T11:50:50.942522Z","shell.execute_reply.started":"2023-05-12T11:49:51.309710Z","shell.execute_reply":"2023-05-12T11:50:50.941305Z"},"trusted":true},"execution_count":2,"outputs":[{"name":"stderr","text":"D0512 11:50:24.269871341 14 config.cc:119] gRPC EXPERIMENT tcp_frame_size_tuning OFF (default:OFF)\nD0512 11:50:24.269909000 14 config.cc:119] gRPC EXPERIMENT tcp_rcv_lowat OFF (default:OFF)\nD0512 11:50:24.269913299 14 config.cc:119] gRPC EXPERIMENT peer_state_based_framing OFF (default:OFF)\nD0512 11:50:24.269916201 14 config.cc:119] gRPC EXPERIMENT flow_control_fixes ON (default:ON)\nD0512 11:50:24.269918879 14 config.cc:119] gRPC EXPERIMENT memory_pressure_controller OFF (default:OFF)\nD0512 11:50:24.269921601 14 config.cc:119] gRPC EXPERIMENT unconstrained_max_quota_buffer_size OFF (default:OFF)\nD0512 11:50:24.269925176 14 config.cc:119] gRPC EXPERIMENT new_hpack_huffman_decoder ON (default:ON)\nD0512 11:50:24.269927811 14 config.cc:119] gRPC EXPERIMENT event_engine_client OFF (default:OFF)\nD0512 11:50:24.269930386 14 config.cc:119] gRPC EXPERIMENT monitoring_experiment ON (default:ON)\nD0512 11:50:24.269932936 14 config.cc:119] gRPC EXPERIMENT promise_based_client_call OFF (default:OFF)\nD0512 11:50:24.269935469 14 config.cc:119] gRPC EXPERIMENT free_large_allocator OFF (default:OFF)\nD0512 11:50:24.269938067 14 config.cc:119] gRPC EXPERIMENT promise_based_server_call OFF (default:OFF)\nD0512 11:50:24.269940970 14 config.cc:119] gRPC EXPERIMENT transport_supplies_client_latency OFF (default:OFF)\nD0512 11:50:24.269952451 14 config.cc:119] gRPC EXPERIMENT event_engine_listener OFF (default:OFF)\nI0512 11:50:24.270176104 14 ev_epoll1_linux.cc:122] grpc epoll fd: 66\nD0512 11:50:24.270188925 14 ev_posix.cc:144] Using polling engine: epoll1\nD0512 11:50:24.270209220 14 dns_resolver_ares.cc:822] Using ares dns resolver\nD0512 11:50:24.270651874 14 lb_policy_registry.cc:46] registering LB policy factory for \"priority_experimental\"\nD0512 11:50:24.270665013 14 lb_policy_registry.cc:46] registering LB policy factory for \"outlier_detection_experimental\"\nD0512 11:50:24.270669110 14 lb_policy_registry.cc:46] registering LB policy factory for \"weighted_target_experimental\"\nD0512 11:50:24.270672580 14 lb_policy_registry.cc:46] registering LB policy factory for \"pick_first\"\nD0512 11:50:24.270675869 14 lb_policy_registry.cc:46] registering LB policy factory for \"round_robin\"\nD0512 11:50:24.270679086 14 lb_policy_registry.cc:46] registering LB policy factory for \"weighted_round_robin_experimental\"\nD0512 11:50:24.270685803 14 lb_policy_registry.cc:46] registering LB policy factory for \"ring_hash_experimental\"\nD0512 11:50:24.270702631 14 lb_policy_registry.cc:46] registering LB policy factory for \"grpclb\"\nD0512 11:50:24.270729400 14 lb_policy_registry.cc:46] registering LB policy factory for \"rls_experimental\"\nD0512 11:50:24.270744660 14 lb_policy_registry.cc:46] registering LB policy factory for \"xds_cluster_manager_experimental\"\nD0512 11:50:24.270749029 14 lb_policy_registry.cc:46] registering LB policy factory for \"xds_cluster_impl_experimental\"\nD0512 11:50:24.270752476 14 lb_policy_registry.cc:46] registering LB policy factory for \"cds_experimental\"\nD0512 11:50:24.270758870 14 lb_policy_registry.cc:46] registering LB policy factory for \"xds_cluster_resolver_experimental\"\nD0512 11:50:24.270762669 14 lb_policy_registry.cc:46] registering LB policy factory for \"xds_override_host_experimental\"\nD0512 11:50:24.270766303 14 lb_policy_registry.cc:46] registering LB policy factory for \"xds_wrr_locality_experimental\"\nD0512 11:50:24.270771070 14 certificate_provider_registry.cc:35] registering certificate provider factory for \"file_watcher\"\nI0512 11:50:24.273060617 14 socket_utils_common_posix.cc:408] Disabling AF_INET6 sockets because ::1 is not available.\nI0512 11:50:24.288500755 14 socket_utils_common_posix.cc:337] TCP_USER_TIMEOUT is available. TCP_USER_TIMEOUT will be used thereafter\nE0512 11:50:24.296032187 14 oauth2_credentials.cc:236] oauth_fetch: UNKNOWN:C-ares status is not ARES_SUCCESS qtype=A name=metadata.google.internal. is_balancer=0: Domain name not found {created_time:\"2023-05-12T11:50:24.29601562+00:00\", grpc_status:2}\n","output_type":"stream"}]},{"cell_type":"code","source":"## Setting up TPUs\ntpu = tf.distribute.cluster_resolver.TPUClusterResolver()\nprint('Running on TPU ', tpu.master())\ntf.config.experimental_connect_to_cluster(tpu)\ntf.tpu.experimental.initialize_tpu_system(tpu)\ntpu_strategy = tf.distribute.TPUStrategy(tpu)\nprint(\"REPLICAS: \", tpu_strategy.num_replicas_in_sync)","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:50:50.944280Z","iopub.execute_input":"2023-05-12T11:50:50.944798Z","iopub.status.idle":"2023-05-12T11:51:00.695080Z","shell.execute_reply.started":"2023-05-12T11:50:50.944771Z","shell.execute_reply":"2023-05-12T11:51:00.693913Z"},"trusted":true},"execution_count":3,"outputs":[{"name":"stdout","text":"Running on TPU \nINFO:tensorflow:Deallocate tpu buffers before initializing tpu system.\nINFO:tensorflow:Initializing the TPU system: local\nINFO:tensorflow:Finished initializing TPU system.\nINFO:tensorflow:Found TPU system:\nINFO:tensorflow:*** Num TPU Cores: 8\nINFO:tensorflow:*** Num TPU Workers: 1\nINFO:tensorflow:*** Num TPU Cores Per Worker: 8\nINFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)\nINFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:0, TPU, 0, 0)\nINFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:1, TPU, 0, 0)\nINFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:2, TPU, 0, 0)\nINFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:3, TPU, 0, 0)\nINFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:4, TPU, 0, 0)\nINFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:5, TPU, 0, 0)\nINFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:6, TPU, 0, 0)\nINFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU:7, TPU, 0, 0)\nINFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)\nREPLICAS: 8\n","output_type":"stream"}]},{"cell_type":"code","source":"class Config:\n EPOCHS = 4\n MODEL = \"bert-base-multilingual-uncased\"\n BUFFER_SIZE = 1000\n BATCH_SIZE = 16*tpu_strategy.num_replicas_in_sync\n MAX_LEN = 192\n LEARNING_RATE = 2e-5\n WEIGHT_DECAY = 1e-6\n RANDOM_STATE = 42","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:51:00.696282Z","iopub.execute_input":"2023-05-12T11:51:00.696560Z","iopub.status.idle":"2023-05-12T11:51:00.702270Z","shell.execute_reply.started":"2023-05-12T11:51:00.696536Z","shell.execute_reply":"2023-05-12T11:51:00.701120Z"},"trusted":true},"execution_count":4,"outputs":[]},{"cell_type":"code","source":"input_dir = \"/kaggle/input/jigsaw-multilingual-toxic-comment-classification\"\ntrain1 = pd.read_csv(os.path.join(input_dir, \"jigsaw-toxic-comment-train.csv\"))\ntrain2 = pd.read_csv(os.path.join(input_dir, \"jigsaw-unintended-bias-train.csv\"))\nval = pd.read_csv(os.path.join(input_dir,\"validation.csv\"))\ntest = pd.read_csv(os.path.join(input_dir,\"test.csv\"))","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:51:00.704483Z","iopub.execute_input":"2023-05-12T11:51:00.704815Z","iopub.status.idle":"2023-05-12T11:51:27.504621Z","shell.execute_reply.started":"2023-05-12T11:51:00.704792Z","shell.execute_reply":"2023-05-12T11:51:27.503561Z"},"trusted":true},"execution_count":5,"outputs":[]},{"cell_type":"code","source":"train1 = train1.iloc[:,1:3]\ntrain2 = train2.iloc[:,1:3]\nval = val.loc[:,[\"comment_text\",\"toxic\"]]\ntest.rename(columns={\"content\":\"comment_text\"}, inplace=True)\nsub = test[['id']]\ntrain2.toxic = (train2.toxic>0.5).astype(int)","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:51:27.505989Z","iopub.execute_input":"2023-05-12T11:51:27.506326Z","iopub.status.idle":"2023-05-12T11:51:27.643971Z","shell.execute_reply.started":"2023-05-12T11:51:27.506301Z","shell.execute_reply":"2023-05-12T11:51:27.643077Z"},"trusted":true},"execution_count":6,"outputs":[]},{"cell_type":"code","source":"train = pd.concat([train1,\n train2.query(\"toxic==1\"),\n train2.query(\"toxic==0\").sample(n=200000, random_state=Config.RANDOM_STATE)])\ntrain.dropna(inplace=True)","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:51:27.645230Z","iopub.execute_input":"2023-05-12T11:51:27.645543Z","iopub.status.idle":"2023-05-12T11:51:27.973540Z","shell.execute_reply.started":"2023-05-12T11:51:27.645516Z","shell.execute_reply":"2023-05-12T11:51:27.972585Z"},"trusted":true},"execution_count":7,"outputs":[]},{"cell_type":"code","source":"train.shape","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:51:27.974748Z","iopub.execute_input":"2023-05-12T11:51:27.975079Z","iopub.status.idle":"2023-05-12T11:51:27.983728Z","shell.execute_reply.started":"2023-05-12T11:51:27.975052Z","shell.execute_reply":"2023-05-12T11:51:27.982961Z"},"trusted":true},"execution_count":8,"outputs":[{"execution_count":8,"output_type":"execute_result","data":{"text/plain":"(535775, 2)"},"metadata":{}}]},{"cell_type":"code","source":"test.rename(columns={\"content\":\"comment_text\"}, inplace=True)","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:51:27.984792Z","iopub.execute_input":"2023-05-12T11:51:27.985120Z","iopub.status.idle":"2023-05-12T11:51:27.995577Z","shell.execute_reply.started":"2023-05-12T11:51:27.985070Z","shell.execute_reply":"2023-05-12T11:51:27.994720Z"},"trusted":true},"execution_count":9,"outputs":[]},{"cell_type":"code","source":"import re\ntrain['comment_text'] = train['comment_text'].apply(lambda x: re.sub('\\n',' ',x).strip())\nval['comment_text'] = val['comment_text'].apply(lambda x: re.sub('\\n',' ',x).strip())\ntest['comment_text'] = test['comment_text'].apply(lambda x: re.sub('\\n',' ',x).strip())","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:51:27.996790Z","iopub.execute_input":"2023-05-12T11:51:27.997135Z","iopub.status.idle":"2023-05-12T11:51:29.490559Z","shell.execute_reply.started":"2023-05-12T11:51:27.997088Z","shell.execute_reply":"2023-05-12T11:51:29.489600Z"},"trusted":true},"execution_count":10,"outputs":[]},{"cell_type":"code","source":"seq_len = [len(i.split()) for i in train.comment_text]\n\npd.Series(seq_len).hist(bins = 30)\nprint(np.mean(seq_len))\nprint(max(seq_len))","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:51:29.493564Z","iopub.execute_input":"2023-05-12T11:51:29.493937Z","iopub.status.idle":"2023-05-12T11:51:32.280356Z","shell.execute_reply.started":"2023-05-12T11:51:29.493911Z","shell.execute_reply":"2023-05-12T11:51:32.279438Z"},"trusted":true},"execution_count":11,"outputs":[{"name":"stdout","text":"56.28243572395129\n2321\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"
","image/png":""},"metadata":{}}]},{"cell_type":"markdown","source":"### Tokenization","metadata":{}},{"cell_type":"code","source":"tokenizer = AutoTokenizer.from_pretrained(Config.MODEL)","metadata":{"_kg_hide-output":true,"execution":{"iopub.status.busy":"2023-05-12T11:51:32.281478Z","iopub.execute_input":"2023-05-12T11:51:32.281773Z","iopub.status.idle":"2023-05-12T11:51:33.112847Z","shell.execute_reply.started":"2023-05-12T11:51:32.281748Z","shell.execute_reply":"2023-05-12T11:51:33.111873Z"},"trusted":true},"execution_count":12,"outputs":[{"name":"stderr","text":"Downloading (…)okenizer_config.json: 100%|██████████| 28.0/28.0 [00:00<00:00, 10.1kB/s]\nDownloading (…)lve/main/config.json: 100%|██████████| 625/625 [00:00<00:00, 382kB/s]\nDownloading (…)solve/main/vocab.txt: 100%|██████████| 872k/872k [00:00<00:00, 13.9MB/s]\nDownloading (…)/main/tokenizer.json: 100%|██████████| 1.72M/1.72M [00:00<00:00, 97.0MB/s]\n","output_type":"stream"}]},{"cell_type":"code","source":"def encoder(text_data, tokenizer=tokenizer, max_len=Config.MAX_LEN):\n return tokenizer(text_data.comment_text.values.tolist(), \n max_length=max_len, \n truncation=True, \n padding=\"max_length\",\n add_special_tokens=True,\n return_tensors=\"tf\",\n return_token_type_ids = False)","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:51:33.113957Z","iopub.execute_input":"2023-05-12T11:51:33.114337Z","iopub.status.idle":"2023-05-12T11:51:33.120170Z","shell.execute_reply.started":"2023-05-12T11:51:33.114311Z","shell.execute_reply":"2023-05-12T11:51:33.119390Z"},"trusted":true},"execution_count":13,"outputs":[]},{"cell_type":"code","source":"encoded_train = encoder(text_data = train)\nencoded_val = encoder(text_data = val)\nencoded_test = encoder(text_data = test)","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:51:33.121147Z","iopub.execute_input":"2023-05-12T11:51:33.121426Z","iopub.status.idle":"2023-05-12T11:52:16.634801Z","shell.execute_reply.started":"2023-05-12T11:51:33.121402Z","shell.execute_reply":"2023-05-12T11:52:16.633568Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"train_dataset = (tf.data.Dataset.from_tensor_slices((dict(encoded_train), train[\"toxic\"]))\n .repeat()\n .shuffle(Config.BUFFER_SIZE)\n .batch(Config.BATCH_SIZE)\n .prefetch(tf.data.AUTOTUNE))\n\nval_dataset = (tf.data.Dataset.from_tensor_slices((dict(encoded_val), val[\"toxic\"]))\n .batch(Config.BATCH_SIZE)\n .prefetch(tf.data.AUTOTUNE))\n\ntest_dataset = tf.data.Dataset.from_tensor_slices(dict(encoded_test)).batch(Config.BATCH_SIZE)","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:52:16.636039Z","iopub.execute_input":"2023-05-12T11:52:16.636353Z","iopub.status.idle":"2023-05-12T11:52:16.668330Z","shell.execute_reply.started":"2023-05-12T11:52:16.636312Z","shell.execute_reply":"2023-05-12T11:52:16.667443Z"},"trusted":true},"execution_count":15,"outputs":[]},{"cell_type":"code","source":"def model_builder(transformer, max_len=Config.MAX_LEN):\n input_ids = Input(shape=(max_len,), dtype=tf.int32, name=\"input_ids\")\n masks = Input(shape=(max_len,), dtype=tf.int32, name=\"attention_mask\")\n \n bert_layers = transformer.bert(input_ids, attention_mask=masks)[0]\n \n \"\"\"intermediate = Dense(1024, activation='relu')(bert_layers)\n output = Dense(1, activation=\"sigmoid\", name=\"output_layer\")(intermediate)\"\"\"\n \n out = GlobalMaxPool1D()(bert_layers)\n out = Dense(768, activation=\"relu\")(out)\n out = Dropout(0.1)(out)\n out = Dense(384, activation=\"relu\")(out)\n output = Dense(1, activation=\"sigmoid\")(out)\n model = Model(inputs=[input_ids, masks], outputs=output)\n model.layers[2].trainable = True\n \n model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=Config.LEARNING_RATE, weight_decay=Config.WEIGHT_DECAY),\n loss=tf.keras.losses.BinaryCrossentropy(),\n metrics=tf.keras.metrics.AUC())\n return model","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:52:16.669447Z","iopub.execute_input":"2023-05-12T11:52:16.669741Z","iopub.status.idle":"2023-05-12T11:52:16.679253Z","shell.execute_reply.started":"2023-05-12T11:52:16.669715Z","shell.execute_reply":"2023-05-12T11:52:16.678393Z"},"trusted":true},"execution_count":16,"outputs":[]},{"cell_type":"code","source":"with tpu_strategy.scope():\n transformer = TFAutoModel.from_pretrained(Config.MODEL)\n model = model_builder(transformer=transformer)","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:52:16.680310Z","iopub.execute_input":"2023-05-12T11:52:16.680659Z","iopub.status.idle":"2023-05-12T11:52:51.834986Z","shell.execute_reply.started":"2023-05-12T11:52:16.680636Z","shell.execute_reply":"2023-05-12T11:52:51.833917Z"},"trusted":true},"execution_count":17,"outputs":[{"name":"stderr","text":"Downloading tf_model.h5: 100%|██████████| 999M/999M [00:10<00:00, 99.5MB/s] \nSome layers from the model checkpoint at bert-base-multilingual-uncased were not used when initializing TFBertModel: ['mlm___cls', 'nsp___cls']\n- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\nAll the layers of TFBertModel were initialized from the model checkpoint at bert-base-multilingual-uncased.\nIf your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.\n","output_type":"stream"}]},{"cell_type":"code","source":"model.summary()","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:52:51.836335Z","iopub.execute_input":"2023-05-12T11:52:51.836733Z","iopub.status.idle":"2023-05-12T11:52:51.881506Z","shell.execute_reply.started":"2023-05-12T11:52:51.836705Z","shell.execute_reply":"2023-05-12T11:52:51.880546Z"},"trusted":true},"execution_count":18,"outputs":[{"name":"stdout","text":"Model: \"model\"\n__________________________________________________________________________________________________\n Layer (type) Output Shape Param # Connected to \n==================================================================================================\n input_ids (InputLayer) [(None, 192)] 0 [] \n \n attention_mask (InputLayer) [(None, 192)] 0 [] \n \n bert (TFBertMainLayer) TFBaseModelOutputWi 167356416 ['input_ids[0][0]', \n thPoolingAndCrossAt 'attention_mask[0][0]'] \n tentions(last_hidde \n n_state=(None, 192, \n 768), \n pooler_output=(Non \n e, 768), \n past_key_values=No \n ne, hidden_states=N \n one, attentions=Non \n e, cross_attentions \n =None) \n \n global_max_pooling1d (GlobalMa (None, 768) 0 ['bert[0][0]'] \n xPooling1D) \n \n dense (Dense) (None, 768) 590592 ['global_max_pooling1d[0][0]'] \n \n dropout_37 (Dropout) (None, 768) 0 ['dense[0][0]'] \n \n dense_1 (Dense) (None, 384) 295296 ['dropout_37[0][0]'] \n \n dense_2 (Dense) (None, 1) 385 ['dense_1[0][0]'] \n \n==================================================================================================\nTotal params: 168,242,689\nTrainable params: 168,242,689\nNon-trainable params: 0\n__________________________________________________________________________________________________\n","output_type":"stream"}]},{"cell_type":"code","source":"train_steps_per_epoch = train.shape[0]//Config.BATCH_SIZE\n\nhistory=model.fit(train_dataset,\n validation_data=val_dataset,\n steps_per_epoch=train_steps_per_epoch,\n epochs=Config.EPOCHS)","metadata":{"execution":{"iopub.status.busy":"2023-05-12T11:52:51.882653Z","iopub.execute_input":"2023-05-12T11:52:51.883060Z","iopub.status.idle":"2023-05-12T12:13:12.713842Z","shell.execute_reply.started":"2023-05-12T11:52:51.883032Z","shell.execute_reply":"2023-05-12T12:13:12.712319Z"},"trusted":true},"execution_count":19,"outputs":[{"name":"stdout","text":"Epoch 1/2\nWARNING:tensorflow:Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n","output_type":"stream"},{"name":"stderr","text":"WARNING:tensorflow:Gradients do not exist for variables ['tf_bert_model/bert/pooler/dense/kernel:0', 'tf_bert_model/bert/pooler/dense/bias:0'] when minimizing the loss. If you're using `model.compile()`, did you forget to provide a `loss` argument?\n","output_type":"stream"},{"name":"stdout","text":"4185/4185 [==============================] - ETA: 0s - loss: 0.0486 - auc: 0.9973","output_type":"stream"},{"name":"stderr","text":"2023-05-12 12:03:44.473468: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.\n2023-05-12 12:03:44.751296: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node Add/ReadVariableOp.\n","output_type":"stream"},{"name":"stdout","text":"4185/4185 [==============================] - 666s 136ms/step - loss: 0.0486 - auc: 0.9973 - val_loss: 0.6711 - val_auc: 0.7589\nEpoch 2/2\n4185/4185 [==============================] - 553s 132ms/step - loss: 0.0420 - auc: 0.9980 - val_loss: 0.6677 - val_auc: 0.7927\n","output_type":"stream"}]},{"cell_type":"code","source":"model.evaluate(val_dataset)","metadata":{"execution":{"iopub.status.busy":"2023-05-12T12:14:40.714960Z","iopub.execute_input":"2023-05-12T12:14:40.715901Z","iopub.status.idle":"2023-05-12T12:14:44.186043Z","shell.execute_reply.started":"2023-05-12T12:14:40.715867Z","shell.execute_reply":"2023-05-12T12:14:44.184917Z"},"trusted":true},"execution_count":21,"outputs":[{"name":"stdout","text":"63/63 [==============================] - 3s 39ms/step - loss: 0.1322 - auc: 0.9915\n","output_type":"stream"},{"execution_count":21,"output_type":"execute_result","data":{"text/plain":"[0.1321573555469513, 0.9915268421173096]"},"metadata":{}}]},{"cell_type":"code","source":"val_steps_per_epoch = val.shape[0]//Config.BATCH_SIZE\nval_history=model.fit(val_dataset.repeat(),\n steps_per_epoch=val_steps_per_epoch,\n epochs=2)","metadata":{"execution":{"iopub.status.busy":"2023-05-12T12:13:29.464786Z","iopub.execute_input":"2023-05-12T12:13:29.465239Z","iopub.status.idle":"2023-05-12T12:14:32.640293Z","shell.execute_reply.started":"2023-05-12T12:13:29.465206Z","shell.execute_reply":"2023-05-12T12:14:32.639052Z"},"trusted":true},"execution_count":20,"outputs":[{"name":"stdout","text":"Epoch 1/2\n62/62 [==============================] - 8s 131ms/step - loss: 0.3363 - auc: 0.8208\nEpoch 2/2\n62/62 [==============================] - 54s 131ms/step - loss: 0.1986 - auc: 0.9492\n","output_type":"stream"}]},{"cell_type":"code","source":"preds = model.predict(test_dataset)\nsub['toxic'] = preds\nsub.to_csv(\"submission.csv\",index=False)","metadata":{"execution":{"iopub.status.busy":"2023-05-12T12:14:57.376394Z","iopub.execute_input":"2023-05-12T12:14:57.377409Z","iopub.status.idle":"2023-05-12T12:15:33.710506Z","shell.execute_reply.started":"2023-05-12T12:14:57.377371Z","shell.execute_reply":"2023-05-12T12:15:33.709250Z"},"trusted":true},"execution_count":22,"outputs":[{"name":"stderr","text":"2023-05-12 12:15:02.970518: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.\n2023-05-12 12:15:03.224869: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] model_pruner failed: INVALID_ARGUMENT: Graph does not contain terminal node AssignAddVariableOp.\n","output_type":"stream"},{"name":"stdout","text":"499/499 [==============================] - 36s 49ms/step\n","output_type":"stream"}]},{"cell_type":"code","source":"model.save(\"mbert-fine-tuned-2-hiddenstates\")","metadata":{"execution":{"iopub.status.busy":"2023-05-12T10:44:30.231587Z","iopub.execute_input":"2023-05-12T10:44:30.231921Z"},"trusted":true},"execution_count":null,"outputs":[{"name":"stderr","text":"WARNING:absl:Found untraced functions such as _update_step_xla, encoder_layer_call_fn, encoder_layer_call_and_return_conditional_losses, pooler_layer_call_fn, pooler_layer_call_and_return_conditional_losses while saving (showing 5 of 829). These functions will not be directly callable after loading.\n","output_type":"stream"}]},{"cell_type":"markdown","source":"### Pushing Model to Hugging Face","metadata":{}},{"cell_type":"code","source":"!huggingface-cli login --token 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","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"from huggingface_hub import push_to_hub_keras\npush_to_hub_keras(model, 'Multilingual-Toxic-Comment-Roberta')","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"markdown","source":"### Loading model from Hub","metadata":{}},{"cell_type":"code","source":"from huggingface_hub import from_pretrained_keras\nm = from_pretrained_keras('shivansh-ka/Multilingual-Toxic-Comment-Roberta')","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"m.summary()","metadata":{},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"preds = m.predict(test_dataset)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"sub['toxic'] = preds\nsub.to_csv(\"submission.csv\",index=False)","metadata":{"trusted":true},"execution_count":null,"outputs":[]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}