Ubuntu commited on Oct 12, 2023

Commit

ed39e1a

•

1 Parent(s): db53170

finetuned again , converted logits to individual probablities

Browse files

Files changed (28) hide show

finetuned_entity_categorical_classification/checkpoint-1606/added_tokens.json +7 -0
finetuned_entity_categorical_classification/checkpoint-1606/config.json +83 -0
finetuned_entity_categorical_classification/checkpoint-1606/optimizer.pt +3 -0
finetuned_entity_categorical_classification/checkpoint-1606/pytorch_model.bin +3 -0
finetuned_entity_categorical_classification/checkpoint-1606/rng_state.pth +0 -0
finetuned_entity_categorical_classification/checkpoint-1606/scheduler.pt +3 -0
finetuned_entity_categorical_classification/checkpoint-1606/special_tokens_map.json +7 -0
finetuned_entity_categorical_classification/checkpoint-1606/tokenizer.json +0 -0
finetuned_entity_categorical_classification/checkpoint-1606/tokenizer_config.json +56 -0
finetuned_entity_categorical_classification/checkpoint-1606/trainer_state.json +46 -0
finetuned_entity_categorical_classification/checkpoint-1606/training_args.bin +3 -0
finetuned_entity_categorical_classification/checkpoint-1606/vocab.txt +0 -0
finetuned_entity_categorical_classification/checkpoint-3212/added_tokens.json +7 -0
finetuned_entity_categorical_classification/checkpoint-3212/config.json +83 -0
finetuned_entity_categorical_classification/checkpoint-3212/optimizer.pt +3 -0
finetuned_entity_categorical_classification/checkpoint-3212/pytorch_model.bin +3 -0
finetuned_entity_categorical_classification/checkpoint-3212/rng_state.pth +0 -0
finetuned_entity_categorical_classification/checkpoint-3212/scheduler.pt +3 -0
finetuned_entity_categorical_classification/checkpoint-3212/special_tokens_map.json +7 -0
finetuned_entity_categorical_classification/checkpoint-3212/tokenizer.json +0 -0
finetuned_entity_categorical_classification/checkpoint-3212/tokenizer_config.json +56 -0
finetuned_entity_categorical_classification/checkpoint-3212/trainer_state.json +73 -0
finetuned_entity_categorical_classification/checkpoint-3212/training_args.bin +3 -0
finetuned_entity_categorical_classification/checkpoint-3212/vocab.txt +0 -0
finetuned_entity_categorical_classification/runs/Oct12_07-34-46_ip-172-31-95-165/events.out.tfevents.1697096087.ip-172-31-95-165.123522.0 +0 -0
research/09_fine_tuning_for_datacategories.ipynb +187 -187
research/09_inference.html +510 -223
research/09_inference.ipynb +802 -212

finetuned_entity_categorical_classification/checkpoint-1606/added_tokens.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "[CLS]": 101,
+  "[MASK]": 103,
+  "[PAD]": 0,
+  "[SEP]": 102,
+  "[UNK]": 100
+}

finetuned_entity_categorical_classification/checkpoint-1606/config.json ADDED Viewed

	@@ -0,0 +1,83 @@

+{
+  "_name_or_path": "finetuned_entity_categorical_classification/checkpoint-23640",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "Hobbies_and_Leisure",
+    "1": "News",
+    "2": "Science",
+    "3": "Autos_and_Vehicles",
+    "4": "Health",
+    "5": "Pets_and_Animals",
+    "6": "Adult",
+    "7": "Computers_and_Electronics",
+    "8": "Online Communities",
+    "9": "Beauty_and_Fitness",
+    "10": "People_and_Society",
+    "11": "Business_and_Industrial",
+    "12": "Reference",
+    "13": "Shopping",
+    "14": "Travel_and_Transportation",
+    "15": "Food_and_Drink",
+    "16": "Law_and_Government",
+    "17": "Books_and_Literature",
+    "18": "Finance",
+    "19": "Games",
+    "20": "Home_and_Garden",
+    "21": "Jobs_and_Education",
+    "22": "Arts_and_Entertainment",
+    "23": "Sensitive Subjects",
+    "24": "Real Estate",
+    "25": "Internet_and_Telecom",
+    "26": "Sports"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "Adult": 6,
+    "Arts_and_Entertainment": 22,
+    "Autos_and_Vehicles": 3,
+    "Beauty_and_Fitness": 9,
+    "Books_and_Literature": 17,
+    "Business_and_Industrial": 11,
+    "Computers_and_Electronics": 7,
+    "Finance": 18,
+    "Food_and_Drink": 15,
+    "Games": 19,
+    "Health": 4,
+    "Hobbies_and_Leisure": 0,
+    "Home_and_Garden": 20,
+    "Internet_and_Telecom": 25,
+    "Jobs_and_Education": 21,
+    "Law_and_Government": 16,
+    "News": 1,
+    "Online Communities": 8,
+    "People_and_Society": 10,
+    "Pets_and_Animals": 5,
+    "Real Estate": 24,
+    "Reference": 12,
+    "Science": 2,
+    "Sensitive Subjects": 23,
+    "Shopping": 13,
+    "Sports": 26,
+    "Travel_and_Transportation": 14
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.34.0",
+  "vocab_size": 30522
+}

finetuned_entity_categorical_classification/checkpoint-1606/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:19acedc3d2479a0b702fa99dc2fbb3d136d6fc0d8c4d7f60c4a7801790fa7f78
+size 535881018

finetuned_entity_categorical_classification/checkpoint-1606/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:412cefb52413fe32419f820fbf788fcb8f36b7ec706fa6533ae06eb5fce7a85d
+size 267932842

finetuned_entity_categorical_classification/checkpoint-1606/rng_state.pth ADDED Viewed

Binary file (14.2 kB). View file

finetuned_entity_categorical_classification/checkpoint-1606/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8dfaaebd8d17209d079d1f5be496af774586b0a5360dfbd5dfc8c1773baeed3a
+size 1064

finetuned_entity_categorical_classification/checkpoint-1606/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

finetuned_entity_categorical_classification/checkpoint-1606/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

finetuned_entity_categorical_classification/checkpoint-1606/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

finetuned_entity_categorical_classification/checkpoint-1606/trainer_state.json ADDED Viewed

	@@ -0,0 +1,46 @@

+{
+  "best_metric": 0.11884114891290665,
+  "best_model_checkpoint": "finetuned_entity_categorical_classification/checkpoint-1606",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 1606,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.31,
+      "learning_rate": 1.6886674968866752e-05,
+      "loss": 1.0674,
+      "step": 500
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 1.37733499377335e-05,
+      "loss": 0.1399,
+      "step": 1000
+    },
+    {
+      "epoch": 0.93,
+      "learning_rate": 1.066002490660025e-05,
+      "loss": 0.1337,
+      "step": 1500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9736842105263158,
+      "eval_loss": 0.11884114891290665,
+      "eval_runtime": 2.2458,
+      "eval_samples_per_second": 2859.611,
+      "eval_steps_per_second": 179.004,
+      "step": 1606
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 3212,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "total_flos": 101362033000800.0,
+  "trial_name": null,
+  "trial_params": null
+}

finetuned_entity_categorical_classification/checkpoint-1606/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0adabf2f73371d63a132d200cc272c0595f2b10bd579056ad508da7aa97ef66e
+size 4600

finetuned_entity_categorical_classification/checkpoint-1606/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

finetuned_entity_categorical_classification/checkpoint-3212/added_tokens.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "[CLS]": 101,
+  "[MASK]": 103,
+  "[PAD]": 0,
+  "[SEP]": 102,
+  "[UNK]": 100
+}

finetuned_entity_categorical_classification/checkpoint-3212/config.json ADDED Viewed

	@@ -0,0 +1,83 @@

+{
+  "_name_or_path": "finetuned_entity_categorical_classification/checkpoint-23640",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "Hobbies_and_Leisure",
+    "1": "News",
+    "2": "Science",
+    "3": "Autos_and_Vehicles",
+    "4": "Health",
+    "5": "Pets_and_Animals",
+    "6": "Adult",
+    "7": "Computers_and_Electronics",
+    "8": "Online Communities",
+    "9": "Beauty_and_Fitness",
+    "10": "People_and_Society",
+    "11": "Business_and_Industrial",
+    "12": "Reference",
+    "13": "Shopping",
+    "14": "Travel_and_Transportation",
+    "15": "Food_and_Drink",
+    "16": "Law_and_Government",
+    "17": "Books_and_Literature",
+    "18": "Finance",
+    "19": "Games",
+    "20": "Home_and_Garden",
+    "21": "Jobs_and_Education",
+    "22": "Arts_and_Entertainment",
+    "23": "Sensitive Subjects",
+    "24": "Real Estate",
+    "25": "Internet_and_Telecom",
+    "26": "Sports"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "Adult": 6,
+    "Arts_and_Entertainment": 22,
+    "Autos_and_Vehicles": 3,
+    "Beauty_and_Fitness": 9,
+    "Books_and_Literature": 17,
+    "Business_and_Industrial": 11,
+    "Computers_and_Electronics": 7,
+    "Finance": 18,
+    "Food_and_Drink": 15,
+    "Games": 19,
+    "Health": 4,
+    "Hobbies_and_Leisure": 0,
+    "Home_and_Garden": 20,
+    "Internet_and_Telecom": 25,
+    "Jobs_and_Education": 21,
+    "Law_and_Government": 16,
+    "News": 1,
+    "Online Communities": 8,
+    "People_and_Society": 10,
+    "Pets_and_Animals": 5,
+    "Real Estate": 24,
+    "Reference": 12,
+    "Science": 2,
+    "Sensitive Subjects": 23,
+    "Shopping": 13,
+    "Sports": 26,
+    "Travel_and_Transportation": 14
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.34.0",
+  "vocab_size": 30522
+}

finetuned_entity_categorical_classification/checkpoint-3212/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:949e9674bd8f44b7f3b456d7c8866cf9e7f2f56afe03fc520788d71cc9e5d877
+size 535881018

finetuned_entity_categorical_classification/checkpoint-3212/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f1137fbeaa6b73c979f5792601cebf66e0fd9ed02b20d85fd1467fc78cd1e26c
+size 267932842

finetuned_entity_categorical_classification/checkpoint-3212/rng_state.pth ADDED Viewed

Binary file (14.2 kB). View file

finetuned_entity_categorical_classification/checkpoint-3212/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8915d9bc09464457beb8a4c6765791b388089c2c9de68f8b710b52b7951ae1d9
+size 1064

finetuned_entity_categorical_classification/checkpoint-3212/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

finetuned_entity_categorical_classification/checkpoint-3212/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

finetuned_entity_categorical_classification/checkpoint-3212/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

finetuned_entity_categorical_classification/checkpoint-3212/trainer_state.json ADDED Viewed

	@@ -0,0 +1,73 @@

+{
+  "best_metric": 0.11884114891290665,
+  "best_model_checkpoint": "finetuned_entity_categorical_classification/checkpoint-1606",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 3212,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.31,
+      "learning_rate": 1.6886674968866752e-05,
+      "loss": 1.0674,
+      "step": 500
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 1.37733499377335e-05,
+      "loss": 0.1399,
+      "step": 1000
+    },
+    {
+      "epoch": 0.93,
+      "learning_rate": 1.066002490660025e-05,
+      "loss": 0.1337,
+      "step": 1500
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9736842105263158,
+      "eval_loss": 0.11884114891290665,
+      "eval_runtime": 2.2458,
+      "eval_samples_per_second": 2859.611,
+      "eval_steps_per_second": 179.004,
+      "step": 1606
+    },
+    {
+      "epoch": 1.25,
+      "learning_rate": 7.5466998754669995e-06,
+      "loss": 0.1071,
+      "step": 2000
+    },
+    {
+      "epoch": 1.56,
+      "learning_rate": 4.433374844333748e-06,
+      "loss": 0.0813,
+      "step": 2500
+    },
+    {
+      "epoch": 1.87,
+      "learning_rate": 1.3200498132004982e-06,
+      "loss": 0.0963,
+      "step": 3000
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9732170663344752,
+      "eval_loss": 0.12265542149543762,
+      "eval_runtime": 2.2396,
+      "eval_samples_per_second": 2867.448,
+      "eval_steps_per_second": 179.495,
+      "step": 3212
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 3212,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "total_flos": 202880405807352.0,
+  "trial_name": null,
+  "trial_params": null
+}

finetuned_entity_categorical_classification/checkpoint-3212/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0adabf2f73371d63a132d200cc272c0595f2b10bd579056ad508da7aa97ef66e
+size 4600

finetuned_entity_categorical_classification/checkpoint-3212/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

finetuned_entity_categorical_classification/runs/Oct12_07-34-46_ip-172-31-95-165/events.out.tfevents.1697096087.ip-172-31-95-165.123522.0 ADDED Viewed

Binary file (7.68 kB). View file

research/09_fine_tuning_for_datacategories.ipynb CHANGED Viewed

@@ -62,93 +62,93 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>2461</th>\n",
-       "      <td>Business networking tips</td>\n",
-       "      <td>Business_and_Industrial</td>\n",
-       "      <td>12</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>10003</th>\n",
-       "      <td>Industrial development and infrastructure proj...</td>\n",
-       "      <td>Business_and_Industrial</td>\n",
-       "      <td>12</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>14189</th>\n",
-       "      <td>Music theory and composition discussions</td>\n",
-       "      <td>Online Communities</td>\n",
-       "      <td>20</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>19874</th>\n",
-       "      <td>Civil litigation process efficiency impact inf...</td>\n",
-       "      <td>Law_and_Government</td>\n",
-       "      <td>11</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>5676</th>\n",
-       "      <td>Human rights violations investigations effecti...</td>\n",
-       "      <td>Law_and_Government</td>\n",
        "      <td>11</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>27151</th>\n",
-       "      <td>Vehicle history apps</td>\n",
-       "      <td>Autos_and_Vehicles</td>\n",
-       "      <td>10</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>21837</th>\n",
-       "      <td>Online references</td>\n",
-       "      <td>Reference</td>\n",
        "      <td>25</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>5541</th>\n",
-       "      <td>Gay Movies Gay</td>\n",
-       "      <td>Adult</td>\n",
-       "      <td>4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>10734</th>\n",
-       "      <td>Catfood for senior cats</td>\n",
-       "      <td>Food_and_Drink</td>\n",
-       "      <td>7</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>25164</th>\n",
-       "      <td>Internet safety for sports fans</td>\n",
-       "      <td>Internet_and_Telecom</td>\n",
-       "      <td>17</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                                                category  \\\n",
-       "2461                            Business networking tips   \n",
-       "10003  Industrial development and infrastructure proj...   \n",
-       "14189           Music theory and composition discussions   \n",
-       "19874  Civil litigation process efficiency impact inf...   \n",
-       "5676   Human rights violations investigations effecti...   \n",
-       "27151                               Vehicle history apps   \n",
-       "21837                                  Online references   \n",
-       "5541                                      Gay Movies Gay   \n",
-       "10734                            Catfood for senior cats   \n",
-       "25164                    Internet safety for sports fans   \n",
        "\n",
-       "                         label  label_id  \n",
-       "2461   Business_and_Industrial        12  \n",
-       "10003  Business_and_Industrial        12  \n",
-       "14189       Online Communities        20  \n",
-       "19874       Law_and_Government        11  \n",
-       "5676        Law_and_Government        11  \n",
-       "27151       Autos_and_Vehicles        10  \n",
-       "21837                Reference        25  \n",
-       "5541                     Adult         4  \n",
-       "10734           Food_and_Drink         7  \n",
-       "25164     Internet_and_Telecom        17  "
       ]
      },
      "execution_count": 3,
@@ -196,40 +196,40 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>Pet nutrition consulting for small mammal spec...</td>\n",
-       "      <td>26</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>Makeup for mature skin</td>\n",
-       "      <td>24</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>Volunteer opportunities near me</td>\n",
-       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>Financial responsibility for college graduates</td>\n",
-       "      <td>21</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>Distance learning</td>\n",
-       "      <td>19</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                                            category  label_id\n",
-       "0  Pet nutrition consulting for small mammal spec...        26\n",
-       "1                             Makeup for mature skin        24\n",
-       "2                    Volunteer opportunities near me         1\n",
-       "3     Financial responsibility for college graduates        21\n",
-       "4                                  Distance learning        19"
       ]
      },
      "execution_count": 4,
@@ -250,8 +250,8 @@
     {
      "data": {
       "text/plain": [
-       "False    20792\n",
-       "True     11038\n",
        "Name: count, dtype: int64"
       ]
      },
@@ -273,7 +273,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/tmp/ipykernel_122572/984288843.py:1: SettingWithCopyWarning: \n",
       "A value is trying to be set on a copy of a slice from a DataFrame\n",
       "\n",
       "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
@@ -307,54 +307,54 @@
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>18501</th>\n",
-       "      <td>Vegan diet myths</td>\n",
-       "      <td>24</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>5596</th>\n",
-       "      <td>Game industry news</td>\n",
-       "      <td>8</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>31812</th>\n",
-       "      <td>Sports Team Fan Support</td>\n",
-       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>31249</th>\n",
-       "      <td>free Granny</td>\n",
-       "      <td>4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>19536</th>\n",
-       "      <td>Travel destination monastic retreats</td>\n",
-       "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>29460</th>\n",
-       "      <td>Sports statistics</td>\n",
-       "      <td>8</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>12554</th>\n",
-       "      <td>Online payment systems</td>\n",
-       "      <td>16</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>26502</th>\n",
-       "      <td>eSports Game Esports Player Fan Engagement Ini...</td>\n",
-       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>24910</th>\n",
-       "      <td>Financial empowerment strategies for empowerment</td>\n",
-       "      <td>21</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>20072</th>\n",
-       "      <td>Kickboxing gloves</td>\n",
-       "      <td>0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -362,16 +362,16 @@
       ],
       "text/plain": [
        "                                                    text  label\n",
-       "18501                                   Vegan diet myths     24\n",
-       "5596                                  Game industry news      8\n",
-       "31812                            Sports Team Fan Support      5\n",
-       "31249                                        free Granny      4\n",
-       "19536               Travel destination monastic retreats      2\n",
-       "29460                                  Sports statistics      8\n",
-       "12554                             Online payment systems     16\n",
-       "26502  eSports Game Esports Player Fan Engagement Ini...      5\n",
-       "24910   Financial empowerment strategies for empowerment     21\n",
-       "20072                                  Kickboxing gloves      0"
       ]
      },
      "execution_count": 6,
@@ -409,7 +409,7 @@
       "text/plain": [
        "Dataset({\n",
        "    features: ['text', 'label'],\n",
-       "    num_rows: 31830\n",
        "})"
       ]
      },
@@ -434,11 +434,11 @@
        "DatasetDict({\n",
        "    train: Dataset({\n",
        "        features: ['text', 'label'],\n",
-       "        num_rows: 25464\n",
        "    })\n",
        "    test: Dataset({\n",
        "        features: ['text', 'label'],\n",
-       "        num_rows: 6366\n",
        "    })\n",
        "})"
       ]
@@ -483,8 +483,8 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Map: 100%|██████████| 25464/25464 [00:00<00:00, 33634.70 examples/s]\n",
-      "Map: 100%|██████████| 6366/6366 [00:00<00:00, 37230.41 examples/s]\n"
      ]
     }
    ],
@@ -501,9 +501,9 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2023-10-12 07:17:13.000135: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
       "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2023-10-12 07:17:14.376613: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
      ]
     }
    ],
@@ -563,33 +563,33 @@
     {
      "data": {
       "text/plain": [
-       "{'Beauty_and_Fitness': 0,\n",
-       " 'People_and_Society': 1,\n",
-       " 'Travel_and_Transportation': 2,\n",
-       " 'Shopping': 3,\n",
-       " 'Adult': 4,\n",
-       " 'Sports': 5,\n",
-       " 'Science': 6,\n",
-       " 'Food_and_Drink': 7,\n",
-       " 'News': 8,\n",
-       " 'Sensitive Subjects': 9,\n",
-       " 'Autos_and_Vehicles': 10,\n",
-       " 'Law_and_Government': 11,\n",
-       " 'Business_and_Industrial': 12,\n",
-       " 'Health': 13,\n",
-       " 'Real Estate': 14,\n",
-       " 'Books_and_Literature': 15,\n",
-       " 'Computers_and_Electronics': 16,\n",
-       " 'Internet_and_Telecom': 17,\n",
-       " 'Home_and_Garden': 18,\n",
-       " 'Jobs_and_Education': 19,\n",
-       " 'Online Communities': 20,\n",
-       " 'Finance': 21,\n",
        " 'Arts_and_Entertainment': 22,\n",
-       " 'Games': 23,\n",
-       " 'Hobbies_and_Leisure': 24,\n",
-       " 'Reference': 25,\n",
-       " 'Pets_and_Animals': 26}"
       ]
      },
      "execution_count": 16,
@@ -612,33 +612,33 @@
     {
      "data": {
       "text/plain": [
-       "{0: 'Beauty_and_Fitness',\n",
-       " 1: 'People_and_Society',\n",
-       " 2: 'Travel_and_Transportation',\n",
-       " 3: 'Shopping',\n",
-       " 4: 'Adult',\n",
-       " 5: 'Sports',\n",
-       " 6: 'Science',\n",
-       " 7: 'Food_and_Drink',\n",
-       " 8: 'News',\n",
-       " 9: 'Sensitive Subjects',\n",
-       " 10: 'Autos_and_Vehicles',\n",
-       " 11: 'Law_and_Government',\n",
-       " 12: 'Business_and_Industrial',\n",
-       " 13: 'Health',\n",
-       " 14: 'Real Estate',\n",
-       " 15: 'Books_and_Literature',\n",
-       " 16: 'Computers_and_Electronics',\n",
-       " 17: 'Internet_and_Telecom',\n",
-       " 18: 'Home_and_Garden',\n",
-       " 19: 'Jobs_and_Education',\n",
-       " 20: 'Online Communities',\n",
-       " 21: 'Finance',\n",
        " 22: 'Arts_and_Entertainment',\n",
-       " 23: 'Games',\n",
-       " 24: 'Hobbies_and_Leisure',\n",
-       " 25: 'Reference',\n",
-       " 26: 'Pets_and_Animals'}"
       ]
      },
      "execution_count": 17,
@@ -685,8 +685,8 @@
        "\n",
        "    <div>\n",
        "      \n",
-       "      <progress value='3184' max='3184' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-       "      [3184/3184 01:41, Epoch 2/2]\n",
        "    </div>\n",
        "    <table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
@@ -700,15 +700,15 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <td>1</td>\n",
-       "      <td>0.098900</td>\n",
-       "      <td>0.105882</td>\n",
-       "      <td>0.972196</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td>2</td>\n",
-       "      <td>0.060600</td>\n",
-       "      <td>0.105043</td>\n",
-       "      <td>0.973296</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table><p>"
@@ -723,7 +723,7 @@
     {
      "data": {
       "text/plain": [
-       "TrainOutput(global_step=3184, training_loss=0.08544207278208517, metrics={'train_runtime': 102.0004, 'train_samples_per_second': 499.292, 'train_steps_per_second': 31.216, 'total_flos': 204535951167600.0, 'train_loss': 0.08544207278208517, 'epoch': 2.0})"
       ]
      },
      "execution_count": 19,

        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
+       "      <th>30126</th>\n",
+       "      <td>Farmers market products</td>\n",
+       "      <td>Food_and_Drink</td>\n",
+       "      <td>15</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>14239</th>\n",
+       "      <td>Political rallies</td>\n",
+       "      <td>News</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>20410</th>\n",
+       "      <td>Diversity celebrations</td>\n",
+       "      <td>People_and_Society</td>\n",
+       "      <td>10</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>1446</th>\n",
+       "      <td>Remote work and remote project management</td>\n",
+       "      <td>Jobs_and_Education</td>\n",
+       "      <td>21</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>6985</th>\n",
+       "      <td>Industrial equipment suppliers</td>\n",
+       "      <td>Business_and_Industrial</td>\n",
        "      <td>11</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>30906</th>\n",
+       "      <td>Guided sleep meditation</td>\n",
+       "      <td>Beauty_and_Fitness</td>\n",
+       "      <td>9</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>4351</th>\n",
+       "      <td>VPN for business</td>\n",
+       "      <td>Internet_and_Telecom</td>\n",
        "      <td>25</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>8599</th>\n",
+       "      <td>Razer Kraken ear cushions</td>\n",
+       "      <td>Computers_and_Electronics</td>\n",
+       "      <td>7</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>28322</th>\n",
+       "      <td>Citation context organization platforms</td>\n",
+       "      <td>Reference</td>\n",
+       "      <td>12</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>5368</th>\n",
+       "      <td>Quality Porn Videos</td>\n",
+       "      <td>Adult</td>\n",
+       "      <td>6</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
+       "                                        category                      label  \\\n",
+       "30126                    Farmers market products             Food_and_Drink   \n",
+       "14239                          Political rallies                       News   \n",
+       "20410                     Diversity celebrations         People_and_Society   \n",
+       "1446   Remote work and remote project management         Jobs_and_Education   \n",
+       "6985              Industrial equipment suppliers    Business_and_Industrial   \n",
+       "30906                    Guided sleep meditation         Beauty_and_Fitness   \n",
+       "4351                            VPN for business       Internet_and_Telecom   \n",
+       "8599                   Razer Kraken ear cushions  Computers_and_Electronics   \n",
+       "28322    Citation context organization platforms                  Reference   \n",
+       "5368                         Quality Porn Videos                      Adult   \n",
        "\n",
+       "       label_id  \n",
+       "30126        15  \n",
+       "14239         1  \n",
+       "20410        10  \n",
+       "1446         21  \n",
+       "6985         11  \n",
+       "30906         9  \n",
+       "4351         25  \n",
+       "8599          7  \n",
+       "28322        12  \n",
+       "5368          6  "
       ]
      },
      "execution_count": 3,
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
+       "      <td>DIY woodworking projects</td>\n",
+       "      <td>20</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
+       "      <td>Music festivals lineup leaks</td>\n",
+       "      <td>22</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
+       "      <td>Sports Team Fan Love</td>\n",
+       "      <td>26</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
+       "      <td>Food portion control and portion control apps</td>\n",
+       "      <td>15</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
+       "      <td>Planting flower beds</td>\n",
+       "      <td>20</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
+       "                                        category  label_id\n",
+       "0                       DIY woodworking projects        20\n",
+       "1                   Music festivals lineup leaks        22\n",
+       "2                           Sports Team Fan Love        26\n",
+       "3  Food portion control and portion control apps        15\n",
+       "4                           Planting flower beds        20"
       ]
      },
      "execution_count": 4,
     {
      "data": {
       "text/plain": [
+       "False    21064\n",
+       "True     11044\n",
        "Name: count, dtype: int64"
       ]
      },
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "/tmp/ipykernel_123522/984288843.py:1: SettingWithCopyWarning: \n",
       "A value is trying to be set on a copy of a slice from a DataFrame\n",
       "\n",
       "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
+       "      <th>22892</th>\n",
+       "      <td>Business data analysis tools</td>\n",
+       "      <td>11</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>26952</th>\n",
+       "      <td>Movie posters minimalist iconic film symbols a...</td>\n",
+       "      <td>22</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>27699</th>\n",
+       "      <td>Sports Team Fan Parties</td>\n",
+       "      <td>26</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>6288</th>\n",
+       "      <td>Collectible vintage items and antiques</td>\n",
+       "      <td>13</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>22173</th>\n",
+       "      <td>Skin rejuvenation treatments and procedures</td>\n",
+       "      <td>9</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>13124</th>\n",
+       "      <td>Poetry analysis guidelines</td>\n",
+       "      <td>22</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>20269</th>\n",
+       "      <td>Health Education for Men</td>\n",
+       "      <td>4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>10112</th>\n",
+       "      <td>MacBook Pro Ports</td>\n",
+       "      <td>7</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>31312</th>\n",
+       "      <td>Mixology equipment for home bartenders and mix...</td>\n",
+       "      <td>15</td>\n",
        "    </tr>\n",
        "    <tr>\n",
+       "      <th>30209</th>\n",
+       "      <td>Poetry analysis examples with explanations</td>\n",
+       "      <td>22</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
       ],
       "text/plain": [
        "                                                    text  label\n",
+       "22892                       Business data analysis tools     11\n",
+       "26952  Movie posters minimalist iconic film symbols a...     22\n",
+       "27699                            Sports Team Fan Parties     26\n",
+       "6288              Collectible vintage items and antiques     13\n",
+       "22173        Skin rejuvenation treatments and procedures      9\n",
+       "13124                         Poetry analysis guidelines     22\n",
+       "20269                           Health Education for Men      4\n",
+       "10112                                  MacBook Pro Ports      7\n",
+       "31312  Mixology equipment for home bartenders and mix...     15\n",
+       "30209         Poetry analysis examples with explanations     22"
       ]
      },
      "execution_count": 6,
       "text/plain": [
        "Dataset({\n",
        "    features: ['text', 'label'],\n",
+       "    num_rows: 32108\n",
        "})"
       ]
      },
        "DatasetDict({\n",
        "    train: Dataset({\n",
        "        features: ['text', 'label'],\n",
+       "        num_rows: 25686\n",
        "    })\n",
        "    test: Dataset({\n",
        "        features: ['text', 'label'],\n",
+       "        num_rows: 6422\n",
        "    })\n",
        "})"
       ]
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "Map: 100%|██████████| 25686/25686 [00:00<00:00, 33313.88 examples/s]\n",
+      "Map: 100%|██████████| 6422/6422 [00:00<00:00, 41958.07 examples/s]\n"
      ]
     }
    ],
      "name": "stderr",
      "output_type": "stream",
      "text": [
+      "2023-10-12 07:34:40.359726: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
       "To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2023-10-12 07:34:41.887700: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT\n"
      ]
     }
    ],
     {
      "data": {
       "text/plain": [
+       "{'Hobbies_and_Leisure': 0,\n",
+       " 'News': 1,\n",
+       " 'Science': 2,\n",
+       " 'Autos_and_Vehicles': 3,\n",
+       " 'Health': 4,\n",
+       " 'Pets_and_Animals': 5,\n",
+       " 'Adult': 6,\n",
+       " 'Computers_and_Electronics': 7,\n",
+       " 'Online Communities': 8,\n",
+       " 'Beauty_and_Fitness': 9,\n",
+       " 'People_and_Society': 10,\n",
+       " 'Business_and_Industrial': 11,\n",
+       " 'Reference': 12,\n",
+       " 'Shopping': 13,\n",
+       " 'Travel_and_Transportation': 14,\n",
+       " 'Food_and_Drink': 15,\n",
+       " 'Law_and_Government': 16,\n",
+       " 'Books_and_Literature': 17,\n",
+       " 'Finance': 18,\n",
+       " 'Games': 19,\n",
+       " 'Home_and_Garden': 20,\n",
+       " 'Jobs_and_Education': 21,\n",
        " 'Arts_and_Entertainment': 22,\n",
+       " 'Sensitive Subjects': 23,\n",
+       " 'Real Estate': 24,\n",
+       " 'Internet_and_Telecom': 25,\n",
+       " 'Sports': 26}"
       ]
      },
      "execution_count": 16,
     {
      "data": {
       "text/plain": [
+       "{0: 'Hobbies_and_Leisure',\n",
+       " 1: 'News',\n",
+       " 2: 'Science',\n",
+       " 3: 'Autos_and_Vehicles',\n",
+       " 4: 'Health',\n",
+       " 5: 'Pets_and_Animals',\n",
+       " 6: 'Adult',\n",
+       " 7: 'Computers_and_Electronics',\n",
+       " 8: 'Online Communities',\n",
+       " 9: 'Beauty_and_Fitness',\n",
+       " 10: 'People_and_Society',\n",
+       " 11: 'Business_and_Industrial',\n",
+       " 12: 'Reference',\n",
+       " 13: 'Shopping',\n",
+       " 14: 'Travel_and_Transportation',\n",
+       " 15: 'Food_and_Drink',\n",
+       " 16: 'Law_and_Government',\n",
+       " 17: 'Books_and_Literature',\n",
+       " 18: 'Finance',\n",
+       " 19: 'Games',\n",
+       " 20: 'Home_and_Garden',\n",
+       " 21: 'Jobs_and_Education',\n",
        " 22: 'Arts_and_Entertainment',\n",
+       " 23: 'Sensitive Subjects',\n",
+       " 24: 'Real Estate',\n",
+       " 25: 'Internet_and_Telecom',\n",
+       " 26: 'Sports'}"
       ]
      },
      "execution_count": 17,
        "\n",
        "    <div>\n",
        "      \n",
+       "      <progress value='3212' max='3212' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
+       "      [3212/3212 01:44, Epoch 2/2]\n",
        "    </div>\n",
        "    <table border=\"1\" class=\"dataframe\">\n",
        "  <thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <td>1</td>\n",
+       "      <td>0.133700</td>\n",
+       "      <td>0.118841</td>\n",
+       "      <td>0.973684</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <td>2</td>\n",
+       "      <td>0.096300</td>\n",
+       "      <td>0.122655</td>\n",
+       "      <td>0.973217</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table><p>"
     {
      "data": {
       "text/plain": [
+       "TrainOutput(global_step=3212, training_loss=0.2577073345445607, metrics={'train_runtime': 105.4831, 'train_samples_per_second': 487.016, 'train_steps_per_second': 30.45, 'total_flos': 202880405807352.0, 'train_loss': 0.2577073345445607, 'epoch': 2.0})"
       ]
      },
      "execution_count": 19,

research/09_inference.html CHANGED Viewed

@@ -7475,7 +7475,7 @@ a.anchor-link {
 </style>
 <!-- End of mermaid configuration --></head>
 <body class="jp-Notebook" data-jp-theme-light="true" data-jp-theme-name="JupyterLab Light">
-<main><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
 <div class="jp-Cell-inputWrapper" tabindex="0">
 <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
 </div>
@@ -7492,25 +7492,7 @@ a.anchor-link {
 </div>
 </div>
 </div>
-<div class="jp-Cell-outputWrapper">
-<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
-</div>
-<div class="jp-OutputArea jp-Cell-outputArea">
-<div class="jp-OutputArea-child">
-<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
-<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="application/vnd.jupyter.stderr" tabindex="0">
-<pre>/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
-  from .autonotebook import tqdm as notebook_tqdm
-2023-10-12 05:59:27.575495: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
-To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
-2023-10-12 05:59:28.314367: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT
-Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
-</pre>
-</div>
-</div>
-</div>
-</div>
-</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
 <div class="jp-Cell-inputWrapper" tabindex="0">
 <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
 </div>
@@ -7526,19 +7508,7 @@ Special tokens have been added in the vocabulary, make sure the associated word
 </div>
 </div>
 </div>
-<div class="jp-Cell-outputWrapper">
-<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
-</div>
-<div class="jp-OutputArea jp-Cell-outputArea">
-<div class="jp-OutputArea-child jp-OutputArea-executeResult">
-<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[ ]:</div>
-<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain" tabindex="0">
-<pre>[{'label': 'Computers_and_Electronics', 'score': 0.9999090433120728}]</pre>
-</div>
-</div>
-</div>
-</div>
-</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
 <div class="jp-Cell-inputWrapper" tabindex="0">
 <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
 </div>
@@ -7554,19 +7524,7 @@ Special tokens have been added in the vocabulary, make sure the associated word
 </div>
 </div>
 </div>
-<div class="jp-Cell-outputWrapper">
-<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
-</div>
-<div class="jp-OutputArea jp-Cell-outputArea">
-<div class="jp-OutputArea-child jp-OutputArea-executeResult">
-<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[ ]:</div>
-<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain" tabindex="0">
-<pre>[{'label': 'Health', 'score': 0.49160146713256836}]</pre>
-</div>
-</div>
-</div>
-</div>
-</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
 <div class="jp-Cell-inputWrapper" tabindex="0">
 <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
 </div>
@@ -7582,18 +7540,6 @@ Special tokens have been added in the vocabulary, make sure the associated word
 </div>
 </div>
 </div>
-<div class="jp-Cell-outputWrapper">
-<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
-</div>
-<div class="jp-OutputArea jp-Cell-outputArea">
-<div class="jp-OutputArea-child jp-OutputArea-executeResult">
-<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[ ]:</div>
-<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain" tabindex="0">
-<pre>[{'label': 'Computers_and_Electronics', 'score': 0.9995001554489136}]</pre>
-</div>
-</div>
-</div>
-</div>
 </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
 <div class="jp-Cell-inputWrapper" tabindex="0">
 <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
@@ -7619,7 +7565,7 @@ Special tokens have been added in the vocabulary, make sure the associated word
 </div>
 </div>
 </div>
-</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
 <div class="jp-Cell-inputWrapper" tabindex="0">
 <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
 </div>
@@ -7628,11 +7574,24 @@ Special tokens have been added in the vocabulary, make sure the associated word
 <div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
 <div class="cm-editor cm-s-jupyter">
 <div class="highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">os</span><span class="p">;</span> <span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="s1">'..'</span><span class="p">)</span>
 </pre></div>
 </div>
 </div>
 </div>
 </div>
 </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
 <div class="jp-Cell-inputWrapper" tabindex="0">
 <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
@@ -7668,7 +7627,7 @@ Special tokens have been added in the vocabulary, make sure the associated word
 <span class="kn">from</span> <span class="nn">torch.nn</span> <span class="kn">import</span> <span class="n">functional</span> <span class="k">as</span> <span class="n">F</span>
-<span class="n">model_name</span><span class="o">=</span> <span class="s2">"finetuned_entity_categorical_classification/checkpoint-23355"</span>
 <span class="n">tokenizer</span> <span class="o">=</span> <span class="n">AutoTokenizer</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="n">model_name</span><span class="p">)</span>
 <span class="n">model</span> <span class="o">=</span> <span class="n">AutoModelForSequenceClassification</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="n">model_name</span><span class="p">)</span>
@@ -7708,14 +7667,20 @@ Special tokens have been added in the vocabulary, make sure the associated word
     <span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
         <span class="n">logits</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span><span class="o">**</span><span class="n">inputs</span><span class="p">)</span><span class="o">.</span><span class="n">logits</span>
-    <span class="nb">print</span><span class="p">(</span><span class="s2">"logits: "</span><span class="p">,</span> <span class="n">logits</span><span class="p">)</span>
     <span class="n">predicted_class_id</span> <span class="o">=</span> <span class="n">logits</span><span class="o">.</span><span class="n">argmax</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
     <span class="c1"># get probabilities using softmax from logit score and convert it to numpy array</span>
     <span class="n">probabilities_scores</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">dim</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">numpy</span><span class="p">()[</span><span class="mi">0</span><span class="p">]</span>
     <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">27</span><span class="p">):</span>
-        <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"P(</span><span class="si">{</span><span class="n">id2label</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="si">}</span><span class="s2">): </span><span class="si">{</span><span class="n">probabilities_scores</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span>
-    <span class="nb">print</span><span class="p">(</span><span class="s2">"Predicted Class: "</span><span class="p">,</span> <span class="n">model</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">id2label</span><span class="p">[</span><span class="n">predicted_class_id</span><span class="p">])</span>
@@ -7745,41 +7710,44 @@ Special tokens have been added in the vocabulary, make sure the associated word
 <div class="jp-OutputArea-child">
 <div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
 <div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
-<pre>logits:  tensor([[-1.4210, -4.3130, -4.1497, -1.9217, -3.3253, -2.3839, -3.2943, -4.6091,
-         -1.9258, -3.6359, -3.7877, -4.2664, -5.1229, -1.8067, -4.4068, -4.5855,
-         10.0302,  0.0293, -2.0481, -5.8791, -3.7072, -3.1037, -4.1602, -0.8520,
-         -3.6628, -4.5927, -4.0272]])
-P(Beauty_and_Fitness): 1.0635108992573805e-05
-P(People_and_Society): 5.899146344745532e-07
-P(Travel_and_Transportation): 6.945512041056645e-07
-P(Shopping): 6.446343832067214e-06
-P(Adult): 1.583859898346418e-06
-P(Sports): 4.060307219333481e-06
-P(Science): 1.6337769466190366e-06
-P(Food_and_Drink): 4.3873527033611026e-07
-P(News): 6.419656983780442e-06
-P(Sensitive Subjects): 1.1609599823714234e-06
-P(Autos_and_Vehicles): 9.975190096156439e-07
-P(Law_and_Government): 6.180094374030887e-07
-P(Business_and_Industrial): 2.6243591833008395e-07
-P(Health): 7.231980362121249e-06
-P(Real Estate): 5.370690701056446e-07
-P(Books_and_Literature): 4.492034122449695e-07
-P(Computers_and_Electronics): 0.9998801946640015
-P(Internet_and_Telecom): 4.535169500741176e-05
-P(Home_and_Garden): 5.680800768459449e-06
-P(Jobs_and_Education): 1.2321044096097467e-07
-P(Online Communities): 1.081151822290849e-06
-P(Finance): 1.976913608814357e-06
-P(Arts_and_Entertainment): 6.872939479762863e-07
-P(Games): 1.8787852241075598e-05
-P(Hobbies_and_Leisure): 1.1302184930173098e-06
-P(Reference): 4.4596322368306573e-07
-P(Pets_and_Animals): 7.850715633139771e-07
-Predicted Class:  Computers_and_Electronics
 </pre>
 </div>
 </div>
 </div>
 </div>
 </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
@@ -7803,41 +7771,44 @@ Predicted Class:  Computers_and_Electronics
 <div class="jp-OutputArea-child">
 <div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
 <div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
-<pre>logits:  tensor([[-0.0132, -3.1295, -4.6255, -4.0011, -6.0542, -1.8381, -2.5773, -0.6340,
-         -2.6285, -4.2835, -4.8787, -4.8718, -5.0328,  3.7794, -4.6336, -5.0319,
-          1.0592, -4.5573, -3.8045, -5.2772, -5.0804, -3.2632, -4.1335, -3.5920,
-         -2.1358, -7.6210,  3.6940]])
-P(Beauty_and_Fitness): 0.011079358868300915
-P(People_and_Society): 0.0004910691641271114
-P(Travel_and_Transportation): 0.00011000979429809377
-P(Shopping): 0.00020539172692224383
-P(Adult): 2.635990676935762e-05
-P(Sports): 0.0017864161636680365
-P(Science): 0.0008529641781933606
-P(Food_and_Drink): 0.005955575965344906
-P(News): 0.000810392084531486
-P(Sensitive Subjects): 0.00015485959011130035
-P(Autos_and_Vehicles): 8.5399005911313e-05
-P(Law_and_Government): 8.598815475124866e-05
-P(Business_and_Industrial): 7.320548320421949e-05
-P(Health): 0.4916036128997803
-P(Real Estate): 0.0001091243393602781
-P(Books_and_Literature): 7.327288767555729e-05
-P(Computers_and_Electronics): 0.03238002583384514
-P(Internet_and_Telecom): 0.00011777772306231782
-P(Home_and_Garden): 0.0002500169211998582
-P(Jobs_and_Education): 5.733156285714358e-05
-P(Online Communities): 6.979802856221795e-05
-P(Finance): 0.00042960469727404416
-P(Arts_and_Entertainment): 0.000179934679181315
-P(Games): 0.00030923119629733264
-P(Hobbies_and_Leisure): 0.0013263950822874904
-P(Reference): 5.501774921867764e-06
-P(Pets_and_Animals): 0.4513714015483856
-Predicted Class:  Health
 </pre>
 </div>
 </div>
 </div>
 </div>
 </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
@@ -7861,41 +7832,44 @@ Predicted Class:  Health
 <div class="jp-OutputArea-child">
 <div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
 <div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
-<pre>logits:  tensor([[ -0.9597,  -3.8456,  -3.1203,  -1.9988,  -5.4966,  -3.5321,  -2.2676,
-           9.3689,   0.3687,  -4.5561,  -5.4510,  -3.8708,  -4.3223,   0.2038,
-          -3.1802,  -3.6065,  -3.8467,  -4.6997,  -3.8446,  -4.4849,  -4.4130,
-          -2.8653,  -2.8191,  -4.9874,  -1.7339, -10.3458,  -1.0289]])
-P(Beauty_and_Fitness): 3.267208012402989e-05
-P(People_and_Society): 1.8231645526611828e-06
-P(Travel_and_Transportation): 3.7654806419595843e-06
-P(Shopping): 1.1558237929421011e-05
-P(Adult): 3.4979228757947567e-07
-P(Sports): 2.4945670702436473e-06
-P(Science): 8.83362372405827e-06
-P(Food_and_Drink): 0.9996380805969238
-P(News): 0.00012333830818533897
-P(Sensitive Subjects): 8.959448223322397e-07
-P(Autos_and_Vehicles): 3.6612007647818245e-07
-P(Law_and_Government): 1.7778713754523778e-06
-P(Business_and_Industrial): 1.13186013095401e-06
-P(Health): 0.0001045860699377954
-P(Real Estate): 3.5467155612423085e-06
-P(Books_and_Literature): 2.3157517716754228e-06
-P(Computers_and_Electronics): 1.821160935833177e-06
-P(Internet_and_Telecom): 7.761184406263055e-07
-P(Home_and_Garden): 1.8250555058330065e-06
-P(Jobs_and_Education): 9.62060425990785e-07
-P(Online Communities): 1.033720309351338e-06
-P(Finance): 4.85956570628332e-06
-P(Arts_and_Entertainment): 5.089193109597545e-06
-P(Games): 5.820724595650972e-07
-P(Hobbies_and_Leisure): 1.5064177205204032e-05
-P(Reference): 2.740457638594762e-09
-P(Pets_and_Animals): 3.0487293770420365e-05
-Predicted Class:  Food_and_Drink
 </pre>
 </div>
 </div>
 </div>
 </div>
 </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
@@ -7919,41 +7893,44 @@ Predicted Class:  Food_and_Drink
 <div class="jp-OutputArea-child">
 <div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
 <div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
-<pre>logits:  tensor([[-3.9272, -2.2786, -3.7970, -3.0280, -3.9465, -1.5384, -1.8026, -2.8501,
-         -2.0297, -4.1079, -3.4422, -3.7435, -3.6991, -2.9137, -2.7802, -4.9385,
-         -1.6897, -3.3684, -2.7991, -4.0702, -4.0103, -2.6430, -3.3914, -4.5762,
-         -2.0696, -7.2857, 10.6981]])
-P(Beauty_and_Fitness): 4.4490917616712977e-07
-P(People_and_Society): 2.3135853552957997e-06
-P(Travel_and_Transportation): 5.068139330433041e-07
-P(Shopping): 1.0934879810520215e-06
-P(Adult): 4.364295307368593e-07
-P(Sports): 4.849702690989943e-06
-P(Science): 3.723783038367401e-06
-P(Food_and_Drink): 1.306354533880949e-06
-P(News): 2.9673019525944255e-06
-P(Sensitive Subjects): 3.7138897823751904e-07
-P(Autos_and_Vehicles): 7.226597631415643e-07
-P(Law_and_Government): 5.346369107428472e-07
-P(Business_and_Industrial): 5.58940200789948e-07
-P(Health): 1.2258614106031018e-06
-P(Real Estate): 1.4009098094902583e-06
-P(Books_and_Literature): 1.6184709750177717e-07
-P(Computers_and_Electronics): 4.168971827311907e-06
-P(Internet_and_Telecom): 7.780183182148903e-07
-P(Home_and_Garden): 1.3746708873441094e-06
-P(Jobs_and_Education): 3.856556816117518e-07
-P(Online Communities): 4.094476082627807e-07
-P(Finance): 1.6070013089120039e-06
-P(Arts_and_Entertainment): 7.603246672260866e-07
-P(Games): 2.3251060099482856e-07
-P(Hobbies_and_Leisure): 2.8512215521914186e-06
-P(Reference): 1.5477359838200755e-08
-P(Pets_and_Animals): 0.9999648332595825
-Predicted Class:  Pets_and_Animals
 </pre>
 </div>
 </div>
 </div>
 </div>
 </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
@@ -7977,41 +7954,351 @@ Predicted Class:  Pets_and_Animals
 <div class="jp-OutputArea-child">
 <div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
 <div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
-<pre>logits:  tensor([[-1.5957, -4.4011, -4.4274, -1.5319, -3.9572, -2.2991, -3.6216, -5.3450,
-         -2.7176, -3.9352, -4.0612, -4.6522, -5.7079, -1.6673, -4.3583, -4.3791,
-          9.6847, -0.6290, -2.0402, -5.4800, -4.3648, -2.9588, -4.5169, -1.1335,
-         -3.7419, -4.2007, -4.0720]])
-P(Beauty_and_Fitness): 1.2615569175977726e-05
-P(People_and_Society): 7.630294476257404e-07
-P(Travel_and_Transportation): 7.431989388351212e-07
-P(Shopping): 1.3446799130178988e-05
-P(Adult): 1.189393287859275e-06
-P(Sports): 6.243569714570185e-06
-P(Science): 1.6636606687825406e-06
-P(Food_and_Drink): 2.969020158616331e-07
-P(News): 4.108236680622213e-06
-P(Sensitive Subjects): 1.2158897106928634e-06
-P(Autos_and_Vehicles): 1.0719173815232352e-06
-P(Law_and_Government): 5.935727926953405e-07
-P(Business_and_Industrial): 2.0653641286116908e-07
-P(Health): 1.1743918548745569e-05
-P(Real Estate): 7.964112000991008e-07
-P(Books_and_Literature): 7.800075536579243e-07
-P(Computers_and_Electronics): 0.9998729228973389
-P(Internet_and_Telecom): 3.316840957268141e-05
-P(Home_and_Garden): 8.08808999863686e-06
-P(Jobs_and_Education): 2.5939584702427965e-07
-P(Online Communities): 7.912186674730037e-07
-P(Finance): 3.2278749131364748e-06
-P(Arts_and_Entertainment): 6.795915510338091e-07
-P(Games): 2.00279555429006e-05
-P(Hobbies_and_Leisure): 1.4751183243788546e-06
-P(Reference): 9.323083531853626e-07
-P(Pets_and_Animals): 1.060413751474698e-06
-Predicted Class:  Computers_and_Electronics
 </pre>
 </div>
 </div>
 </div>
 </div>
 </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">

 </style>
 <!-- End of mermaid configuration --></head>
 <body class="jp-Notebook" data-jp-theme-light="true" data-jp-theme-name="JupyterLab Light">
+<main><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
 <div class="jp-Cell-inputWrapper" tabindex="0">
 <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
 </div>
 </div>
 </div>
 </div>
+</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
 <div class="jp-Cell-inputWrapper" tabindex="0">
 <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
 </div>
 </div>
 </div>
 </div>
+</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
 <div class="jp-Cell-inputWrapper" tabindex="0">
 <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
 </div>
 </div>
 </div>
 </div>
+</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
 <div class="jp-Cell-inputWrapper" tabindex="0">
 <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
 </div>
 </div>
 </div>
 </div>
 </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
 <div class="jp-Cell-inputWrapper" tabindex="0">
 <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
 </div>
 </div>
 </div>
+</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
 <div class="jp-Cell-inputWrapper" tabindex="0">
 <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
 </div>
 <div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
 <div class="cm-editor cm-s-jupyter">
 <div class="highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">os</span><span class="p">;</span> <span class="n">os</span><span class="o">.</span><span class="n">chdir</span><span class="p">(</span><span class="s1">'..'</span><span class="p">)</span>
+<span class="o">%</span><span class="k">pwd</span>
 </pre></div>
 </div>
 </div>
 </div>
 </div>
+<div class="jp-Cell-outputWrapper">
+<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
+</div>
+<div class="jp-OutputArea jp-Cell-outputArea">
+<div class="jp-OutputArea-child jp-OutputArea-executeResult">
+<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[ ]:</div>
+<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain" tabindex="0">
+<pre>'/home/ubuntu/SentenceStructureComparision'</pre>
+</div>
+</div>
+</div>
+</div>
 </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">
 <div class="jp-Cell-inputWrapper" tabindex="0">
 <div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
 <span class="kn">from</span> <span class="nn">torch.nn</span> <span class="kn">import</span> <span class="n">functional</span> <span class="k">as</span> <span class="n">F</span>
+<span class="n">model_name</span><span class="o">=</span> <span class="s2">"finetuned_entity_categorical_classification/checkpoint-3212"</span>
 <span class="n">tokenizer</span> <span class="o">=</span> <span class="n">AutoTokenizer</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="n">model_name</span><span class="p">)</span>
 <span class="n">model</span> <span class="o">=</span> <span class="n">AutoModelForSequenceClassification</span><span class="o">.</span><span class="n">from_pretrained</span><span class="p">(</span><span class="n">model_name</span><span class="p">)</span>
     <span class="k">with</span> <span class="n">torch</span><span class="o">.</span><span class="n">no_grad</span><span class="p">():</span>
         <span class="n">logits</span> <span class="o">=</span> <span class="n">model</span><span class="p">(</span><span class="o">**</span><span class="n">inputs</span><span class="p">)</span><span class="o">.</span><span class="n">logits</span>
+    <span class="c1"># print("logits: ", logits)</span>
     <span class="n">predicted_class_id</span> <span class="o">=</span> <span class="n">logits</span><span class="o">.</span><span class="n">argmax</span><span class="p">()</span><span class="o">.</span><span class="n">item</span><span class="p">()</span>
     <span class="c1"># get probabilities using softmax from logit score and convert it to numpy array</span>
     <span class="n">probabilities_scores</span> <span class="o">=</span> <span class="n">F</span><span class="o">.</span><span class="n">softmax</span><span class="p">(</span><span class="n">logits</span><span class="p">,</span> <span class="n">dim</span> <span class="o">=</span> <span class="o">-</span><span class="mi">1</span><span class="p">)</span><span class="o">.</span><span class="n">numpy</span><span class="p">()[</span><span class="mi">0</span><span class="p">]</span>
+    <span class="n">d</span><span class="o">=</span> <span class="p">{}</span>
     <span class="k">for</span> <span class="n">i</span> <span class="ow">in</span> <span class="nb">range</span><span class="p">(</span><span class="mi">27</span><span class="p">):</span>
+        <span class="c1"># print(f"P({id2label[i]}): {probabilities_scores[i]}")</span>
+        <span class="c1"># d[f'P({id2label[i]})']= format(probabilities_scores[i], '.2f')</span>
+        <span class="n">d</span><span class="p">[</span><span class="sa">f</span><span class="s1">'P(</span><span class="si">{</span><span class="n">id2label</span><span class="p">[</span><span class="n">i</span><span class="p">]</span><span class="si">}</span><span class="s1">)'</span><span class="p">]</span><span class="o">=</span> <span class="nb">round</span><span class="p">(</span><span class="n">probabilities_scores</span><span class="p">[</span><span class="n">i</span><span class="p">],</span> <span class="mi">3</span><span class="p">)</span>
+    <span class="nb">print</span><span class="p">(</span><span class="s2">"Predicted Class: "</span><span class="p">,</span> <span class="n">model</span><span class="o">.</span><span class="n">config</span><span class="o">.</span><span class="n">id2label</span><span class="p">[</span><span class="n">predicted_class_id</span><span class="p">],</span> <span class="sa">f</span><span class="s2">"</span><span class="se">\n</span><span class="s2">probabilities_scores: </span><span class="si">{</span><span class="n">probabilities_scores</span><span class="p">[</span><span class="n">predicted_class_id</span><span class="p">]</span><span class="si">}</span><span class="se">\n</span><span class="s2">"</span><span class="p">)</span>
+    <span class="k">return</span> <span class="n">d</span>
 <div class="jp-OutputArea-child">
 <div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
 <div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
+<pre>Predicted Class:  Computers_and_Electronics
+probabilities_scores: 0.9997648596763611
 </pre>
 </div>
 </div>
+<div class="jp-OutputArea-child jp-OutputArea-executeResult">
+<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[ ]:</div>
+<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain" tabindex="0">
+<pre>{'P(Hobbies_and_Leisure)': 0.0,
+ 'P(News)': 0.0,
+ 'P(Science)': 0.0,
+ 'P(Autos_and_Vehicles)': 0.0,
+ 'P(Health)': 0.0,
+ 'P(Pets_and_Animals)': 0.0,
+ 'P(Adult)': 0.0,
+ 'P(Computers_and_Electronics)': 1.0,
+ 'P(Online Communities)': 0.0,
+ 'P(Beauty_and_Fitness)': 0.0,
+ 'P(People_and_Society)': 0.0,
+ 'P(Business_and_Industrial)': 0.0,
+ 'P(Reference)': 0.0,
+ 'P(Shopping)': 0.0,
+ 'P(Travel_and_Transportation)': 0.0,
+ 'P(Food_and_Drink)': 0.0,
+ 'P(Law_and_Government)': 0.0,
+ 'P(Books_and_Literature)': 0.0,
+ 'P(Finance)': 0.0,
+ 'P(Games)': 0.0,
+ 'P(Home_and_Garden)': 0.0,
+ 'P(Jobs_and_Education)': 0.0,
+ 'P(Arts_and_Entertainment)': 0.0,
+ 'P(Sensitive Subjects)': 0.0,
+ 'P(Real Estate)': 0.0,
+ 'P(Internet_and_Telecom)': 0.0,
+ 'P(Sports)': 0.0}</pre>
+</div>
+</div>
 </div>
 </div>
 </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
 <div class="jp-OutputArea-child">
 <div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
 <div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
+<pre>Predicted Class:  Food_and_Drink
+probabilities_scores: 0.9993139505386353
 </pre>
 </div>
 </div>
+<div class="jp-OutputArea-child jp-OutputArea-executeResult">
+<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[ ]:</div>
+<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain" tabindex="0">
+<pre>{'P(Hobbies_and_Leisure)': 0.0,
+ 'P(News)': 0.0,
+ 'P(Science)': 0.0,
+ 'P(Autos_and_Vehicles)': 0.0,
+ 'P(Health)': 0.0,
+ 'P(Pets_and_Animals)': 0.0,
+ 'P(Adult)': 0.0,
+ 'P(Computers_and_Electronics)': 0.0,
+ 'P(Online Communities)': 0.0,
+ 'P(Beauty_and_Fitness)': 0.0,
+ 'P(People_and_Society)': 0.0,
+ 'P(Business_and_Industrial)': 0.0,
+ 'P(Reference)': 0.0,
+ 'P(Shopping)': 0.0,
+ 'P(Travel_and_Transportation)': 0.0,
+ 'P(Food_and_Drink)': 0.999,
+ 'P(Law_and_Government)': 0.0,
+ 'P(Books_and_Literature)': 0.0,
+ 'P(Finance)': 0.0,
+ 'P(Games)': 0.0,
+ 'P(Home_and_Garden)': 0.0,
+ 'P(Jobs_and_Education)': 0.0,
+ 'P(Arts_and_Entertainment)': 0.0,
+ 'P(Sensitive Subjects)': 0.0,
+ 'P(Real Estate)': 0.0,
+ 'P(Internet_and_Telecom)': 0.0,
+ 'P(Sports)': 0.0}</pre>
+</div>
+</div>
 </div>
 </div>
 </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
 <div class="jp-OutputArea-child">
 <div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
 <div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
+<pre>Predicted Class:  Food_and_Drink
+probabilities_scores: 0.9997541308403015
 </pre>
 </div>
 </div>
+<div class="jp-OutputArea-child jp-OutputArea-executeResult">
+<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[ ]:</div>
+<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain" tabindex="0">
+<pre>{'P(Hobbies_and_Leisure)': 0.0,
+ 'P(News)': 0.0,
+ 'P(Science)': 0.0,
+ 'P(Autos_and_Vehicles)': 0.0,
+ 'P(Health)': 0.0,
+ 'P(Pets_and_Animals)': 0.0,
+ 'P(Adult)': 0.0,
+ 'P(Computers_and_Electronics)': 0.0,
+ 'P(Online Communities)': 0.0,
+ 'P(Beauty_and_Fitness)': 0.0,
+ 'P(People_and_Society)': 0.0,
+ 'P(Business_and_Industrial)': 0.0,
+ 'P(Reference)': 0.0,
+ 'P(Shopping)': 0.0,
+ 'P(Travel_and_Transportation)': 0.0,
+ 'P(Food_and_Drink)': 1.0,
+ 'P(Law_and_Government)': 0.0,
+ 'P(Books_and_Literature)': 0.0,
+ 'P(Finance)': 0.0,
+ 'P(Games)': 0.0,
+ 'P(Home_and_Garden)': 0.0,
+ 'P(Jobs_and_Education)': 0.0,
+ 'P(Arts_and_Entertainment)': 0.0,
+ 'P(Sensitive Subjects)': 0.0,
+ 'P(Real Estate)': 0.0,
+ 'P(Internet_and_Telecom)': 0.0,
+ 'P(Sports)': 0.0}</pre>
+</div>
+</div>
 </div>
 </div>
 </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
 <div class="jp-OutputArea-child">
 <div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
 <div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
+<pre>Predicted Class:  Food_and_Drink
+probabilities_scores: 0.9963496923446655
 </pre>
 </div>
 </div>
+<div class="jp-OutputArea-child jp-OutputArea-executeResult">
+<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[ ]:</div>
+<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain" tabindex="0">
+<pre>{'P(Hobbies_and_Leisure)': 0.0,
+ 'P(News)': 0.0,
+ 'P(Science)': 0.0,
+ 'P(Autos_and_Vehicles)': 0.0,
+ 'P(Health)': 0.0,
+ 'P(Pets_and_Animals)': 0.002,
+ 'P(Adult)': 0.0,
+ 'P(Computers_and_Electronics)': 0.0,
+ 'P(Online Communities)': 0.0,
+ 'P(Beauty_and_Fitness)': 0.0,
+ 'P(People_and_Society)': 0.0,
+ 'P(Business_and_Industrial)': 0.0,
+ 'P(Reference)': 0.0,
+ 'P(Shopping)': 0.0,
+ 'P(Travel_and_Transportation)': 0.0,
+ 'P(Food_and_Drink)': 0.996,
+ 'P(Law_and_Government)': 0.0,
+ 'P(Books_and_Literature)': 0.0,
+ 'P(Finance)': 0.0,
+ 'P(Games)': 0.0,
+ 'P(Home_and_Garden)': 0.0,
+ 'P(Jobs_and_Education)': 0.0,
+ 'P(Arts_and_Entertainment)': 0.0,
+ 'P(Sensitive Subjects)': 0.0,
+ 'P(Real Estate)': 0.0,
+ 'P(Internet_and_Telecom)': 0.0,
+ 'P(Sports)': 0.0}</pre>
+</div>
+</div>
 </div>
 </div>
 </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
 <div class="jp-OutputArea-child">
 <div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
 <div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
+<pre>Predicted Class:  Computers_and_Electronics
+probabilities_scores: 0.999832034111023
+</pre>
+</div>
+</div>
+<div class="jp-OutputArea-child jp-OutputArea-executeResult">
+<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[ ]:</div>
+<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain" tabindex="0">
+<pre>{'P(Hobbies_and_Leisure)': 0.0,
+ 'P(News)': 0.0,
+ 'P(Science)': 0.0,
+ 'P(Autos_and_Vehicles)': 0.0,
+ 'P(Health)': 0.0,
+ 'P(Pets_and_Animals)': 0.0,
+ 'P(Adult)': 0.0,
+ 'P(Computers_and_Electronics)': 1.0,
+ 'P(Online Communities)': 0.0,
+ 'P(Beauty_and_Fitness)': 0.0,
+ 'P(People_and_Society)': 0.0,
+ 'P(Business_and_Industrial)': 0.0,
+ 'P(Reference)': 0.0,
+ 'P(Shopping)': 0.0,
+ 'P(Travel_and_Transportation)': 0.0,
+ 'P(Food_and_Drink)': 0.0,
+ 'P(Law_and_Government)': 0.0,
+ 'P(Books_and_Literature)': 0.0,
+ 'P(Finance)': 0.0,
+ 'P(Games)': 0.0,
+ 'P(Home_and_Garden)': 0.0,
+ 'P(Jobs_and_Education)': 0.0,
+ 'P(Arts_and_Entertainment)': 0.0,
+ 'P(Sensitive Subjects)': 0.0,
+ 'P(Real Estate)': 0.0,
+ 'P(Internet_and_Telecom)': 0.0,
+ 'P(Sports)': 0.0}</pre>
+</div>
+</div>
+</div>
+</div>
+</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
+<div class="jp-Cell-inputWrapper" tabindex="0">
+<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
+</div>
+<div class="jp-InputArea jp-Cell-inputArea">
+<div class="jp-InputPrompt jp-InputArea-prompt">In [ ]:</div>
+<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
+<div class="cm-editor cm-s-jupyter">
+<div class="highlight hl-ipython3"><pre><span></span><span class="n">predict</span><span class="p">(</span><span class="s2">"apple "</span><span class="p">)</span>
+</pre></div>
+</div>
+</div>
+</div>
+</div>
+<div class="jp-Cell-outputWrapper">
+<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
+</div>
+<div class="jp-OutputArea jp-Cell-outputArea">
+<div class="jp-OutputArea-child">
+<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
+<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
+<pre>Predicted Class:  Food_and_Drink
+probabilities_scores: 0.5473537445068359
 </pre>
 </div>
 </div>
+<div class="jp-OutputArea-child jp-OutputArea-executeResult">
+<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[ ]:</div>
+<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain" tabindex="0">
+<pre>{'P(Hobbies_and_Leisure)': 0.0,
+ 'P(News)': 0.0,
+ 'P(Science)': 0.0,
+ 'P(Autos_and_Vehicles)': 0.0,
+ 'P(Health)': 0.0,
+ 'P(Pets_and_Animals)': 0.0,
+ 'P(Adult)': 0.0,
+ 'P(Computers_and_Electronics)': 0.448,
+ 'P(Online Communities)': 0.0,
+ 'P(Beauty_and_Fitness)': 0.0,
+ 'P(People_and_Society)': 0.0,
+ 'P(Business_and_Industrial)': 0.0,
+ 'P(Reference)': 0.0,
+ 'P(Shopping)': 0.001,
+ 'P(Travel_and_Transportation)': 0.0,
+ 'P(Food_and_Drink)': 0.547,
+ 'P(Law_and_Government)': 0.0,
+ 'P(Books_and_Literature)': 0.0,
+ 'P(Finance)': 0.0,
+ 'P(Games)': 0.002,
+ 'P(Home_and_Garden)': 0.0,
+ 'P(Jobs_and_Education)': 0.0,
+ 'P(Arts_and_Entertainment)': 0.0,
+ 'P(Sensitive Subjects)': 0.0,
+ 'P(Real Estate)': 0.0,
+ 'P(Internet_and_Telecom)': 0.0,
+ 'P(Sports)': 0.0}</pre>
+</div>
+</div>
+</div>
+</div>
+</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
+<div class="jp-Cell-inputWrapper" tabindex="0">
+<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
+</div>
+<div class="jp-InputArea jp-Cell-inputArea">
+<div class="jp-InputPrompt jp-InputArea-prompt">In [ ]:</div>
+<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
+<div class="cm-editor cm-s-jupyter">
+<div class="highlight hl-ipython3"><pre><span></span><span class="n">predict</span><span class="p">(</span><span class="s1">'apple iphone'</span><span class="p">)</span>
+</pre></div>
+</div>
+</div>
+</div>
+</div>
+<div class="jp-Cell-outputWrapper">
+<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
+</div>
+<div class="jp-OutputArea jp-Cell-outputArea">
+<div class="jp-OutputArea-child">
+<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
+<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
+<pre>Predicted Class:  Computers_and_Electronics
+probabilities_scores: 0.9997270703315735
+</pre>
+</div>
+</div>
+<div class="jp-OutputArea-child jp-OutputArea-executeResult">
+<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[ ]:</div>
+<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain" tabindex="0">
+<pre>{'P(Hobbies_and_Leisure)': 0.0,
+ 'P(News)': 0.0,
+ 'P(Science)': 0.0,
+ 'P(Autos_and_Vehicles)': 0.0,
+ 'P(Health)': 0.0,
+ 'P(Pets_and_Animals)': 0.0,
+ 'P(Adult)': 0.0,
+ 'P(Computers_and_Electronics)': 1.0,
+ 'P(Online Communities)': 0.0,
+ 'P(Beauty_and_Fitness)': 0.0,
+ 'P(People_and_Society)': 0.0,
+ 'P(Business_and_Industrial)': 0.0,
+ 'P(Reference)': 0.0,
+ 'P(Shopping)': 0.0,
+ 'P(Travel_and_Transportation)': 0.0,
+ 'P(Food_and_Drink)': 0.0,
+ 'P(Law_and_Government)': 0.0,
+ 'P(Books_and_Literature)': 0.0,
+ 'P(Finance)': 0.0,
+ 'P(Games)': 0.0,
+ 'P(Home_and_Garden)': 0.0,
+ 'P(Jobs_and_Education)': 0.0,
+ 'P(Arts_and_Entertainment)': 0.0,
+ 'P(Sensitive Subjects)': 0.0,
+ 'P(Real Estate)': 0.0,
+ 'P(Internet_and_Telecom)': 0.0,
+ 'P(Sports)': 0.0}</pre>
+</div>
+</div>
+</div>
+</div>
+</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
+<div class="jp-Cell-inputWrapper" tabindex="0">
+<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
+</div>
+<div class="jp-InputArea jp-Cell-inputArea">
+<div class="jp-InputPrompt jp-InputArea-prompt">In [ ]:</div>
+<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
+<div class="cm-editor cm-s-jupyter">
+<div class="highlight hl-ipython3"><pre><span></span><span class="n">predict</span><span class="p">(</span>
+    <span class="s1">'razer kraken'</span>
+<span class="p">)</span>
+</pre></div>
+</div>
+</div>
+</div>
+</div>
+<div class="jp-Cell-outputWrapper">
+<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
+</div>
+<div class="jp-OutputArea jp-Cell-outputArea">
+<div class="jp-OutputArea-child">
+<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
+<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
+<pre>Predicted Class:  Computers_and_Electronics
+probabilities_scores: 0.9997072815895081
+</pre>
+</div>
+</div>
+<div class="jp-OutputArea-child jp-OutputArea-executeResult">
+<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[ ]:</div>
+<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain" tabindex="0">
+<pre>{'P(Hobbies_and_Leisure)': 0.0,
+ 'P(News)': 0.0,
+ 'P(Science)': 0.0,
+ 'P(Autos_and_Vehicles)': 0.0,
+ 'P(Health)': 0.0,
+ 'P(Pets_and_Animals)': 0.0,
+ 'P(Adult)': 0.0,
+ 'P(Computers_and_Electronics)': 1.0,
+ 'P(Online Communities)': 0.0,
+ 'P(Beauty_and_Fitness)': 0.0,
+ 'P(People_and_Society)': 0.0,
+ 'P(Business_and_Industrial)': 0.0,
+ 'P(Reference)': 0.0,
+ 'P(Shopping)': 0.0,
+ 'P(Travel_and_Transportation)': 0.0,
+ 'P(Food_and_Drink)': 0.0,
+ 'P(Law_and_Government)': 0.0,
+ 'P(Books_and_Literature)': 0.0,
+ 'P(Finance)': 0.0,
+ 'P(Games)': 0.0,
+ 'P(Home_and_Garden)': 0.0,
+ 'P(Jobs_and_Education)': 0.0,
+ 'P(Arts_and_Entertainment)': 0.0,
+ 'P(Sensitive Subjects)': 0.0,
+ 'P(Real Estate)': 0.0,
+ 'P(Internet_and_Telecom)': 0.0,
+ 'P(Sports)': 0.0}</pre>
+</div>
+</div>
+</div>
+</div>
+</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
+<div class="jp-Cell-inputWrapper" tabindex="0">
+<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
+</div>
+<div class="jp-InputArea jp-Cell-inputArea">
+<div class="jp-InputPrompt jp-InputArea-prompt">In [ ]:</div>
+<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
+<div class="cm-editor cm-s-jupyter">
+<div class="highlight hl-ipython3"><pre><span></span><span class="n">predict</span><span class="p">(</span><span class="s2">"facebook"</span><span class="p">)</span>
+</pre></div>
+</div>
+</div>
+</div>
+</div>
+<div class="jp-Cell-outputWrapper">
+<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
+</div>
+<div class="jp-OutputArea jp-Cell-outputArea">
+<div class="jp-OutputArea-child">
+<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
+<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
+<pre>Predicted Class:  Online Communities
+probabilities_scores: 0.997126042842865
+</pre>
+</div>
+</div>
+<div class="jp-OutputArea-child jp-OutputArea-executeResult">
+<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[ ]:</div>
+<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain" tabindex="0">
+<pre>{'P(Hobbies_and_Leisure)': 0.0,
+ 'P(News)': 0.0,
+ 'P(Science)': 0.0,
+ 'P(Autos_and_Vehicles)': 0.0,
+ 'P(Health)': 0.0,
+ 'P(Pets_and_Animals)': 0.0,
+ 'P(Adult)': 0.0,
+ 'P(Computers_and_Electronics)': 0.001,
+ 'P(Online Communities)': 0.997,
+ 'P(Beauty_and_Fitness)': 0.0,
+ 'P(People_and_Society)': 0.0,
+ 'P(Business_and_Industrial)': 0.0,
+ 'P(Reference)': 0.0,
+ 'P(Shopping)': 0.0,
+ 'P(Travel_and_Transportation)': 0.0,
+ 'P(Food_and_Drink)': 0.0,
+ 'P(Law_and_Government)': 0.0,
+ 'P(Books_and_Literature)': 0.0,
+ 'P(Finance)': 0.0,
+ 'P(Games)': 0.0,
+ 'P(Home_and_Garden)': 0.001,
+ 'P(Jobs_and_Education)': 0.0,
+ 'P(Arts_and_Entertainment)': 0.0,
+ 'P(Sensitive Subjects)': 0.0,
+ 'P(Real Estate)': 0.0,
+ 'P(Internet_and_Telecom)': 0.0,
+ 'P(Sports)': 0.0}</pre>
+</div>
+</div>
+</div>
+</div>
+</div><div class="jp-Cell jp-CodeCell jp-Notebook-cell">
+<div class="jp-Cell-inputWrapper" tabindex="0">
+<div class="jp-Collapser jp-InputCollapser jp-Cell-inputCollapser">
+</div>
+<div class="jp-InputArea jp-Cell-inputArea">
+<div class="jp-InputPrompt jp-InputArea-prompt">In [ ]:</div>
+<div class="jp-CodeMirrorEditor jp-Editor jp-InputArea-editor" data-type="inline">
+<div class="cm-editor cm-s-jupyter">
+<div class="highlight hl-ipython3"><pre><span></span><span class="n">predict</span><span class="p">(</span><span class="s1">'apple iphone'</span><span class="p">)</span>
+</pre></div>
+</div>
+</div>
+</div>
+</div>
+<div class="jp-Cell-outputWrapper">
+<div class="jp-Collapser jp-OutputCollapser jp-Cell-outputCollapser">
+</div>
+<div class="jp-OutputArea jp-Cell-outputArea">
+<div class="jp-OutputArea-child">
+<div class="jp-OutputPrompt jp-OutputArea-prompt"></div>
+<div class="jp-RenderedText jp-OutputArea-output" data-mime-type="text/plain" tabindex="0">
+<pre>Predicted Class:  Computers_and_Electronics
+probabilities_scores: 0.9997270703315735
+</pre>
+</div>
+</div>
+<div class="jp-OutputArea-child jp-OutputArea-executeResult">
+<div class="jp-OutputPrompt jp-OutputArea-prompt">Out[ ]:</div>
+<div class="jp-RenderedText jp-OutputArea-output jp-OutputArea-executeResult" data-mime-type="text/plain" tabindex="0">
+<pre>{'P(Hobbies_and_Leisure)': 0.0,
+ 'P(News)': 0.0,
+ 'P(Science)': 0.0,
+ 'P(Autos_and_Vehicles)': 0.0,
+ 'P(Health)': 0.0,
+ 'P(Pets_and_Animals)': 0.0,
+ 'P(Adult)': 0.0,
+ 'P(Computers_and_Electronics)': 1.0,
+ 'P(Online Communities)': 0.0,
+ 'P(Beauty_and_Fitness)': 0.0,
+ 'P(People_and_Society)': 0.0,
+ 'P(Business_and_Industrial)': 0.0,
+ 'P(Reference)': 0.0,
+ 'P(Shopping)': 0.0,
+ 'P(Travel_and_Transportation)': 0.0,
+ 'P(Food_and_Drink)': 0.0,
+ 'P(Law_and_Government)': 0.0,
+ 'P(Books_and_Literature)': 0.0,
+ 'P(Finance)': 0.0,
+ 'P(Games)': 0.0,
+ 'P(Home_and_Garden)': 0.0,
+ 'P(Jobs_and_Education)': 0.0,
+ 'P(Arts_and_Entertainment)': 0.0,
+ 'P(Sensitive Subjects)': 0.0,
+ 'P(Real Estate)': 0.0,
+ 'P(Internet_and_Telecom)': 0.0,
+ 'P(Sports)': 0.0}</pre>
+</div>
+</div>
 </div>
 </div>
 </div><div class="jp-Cell jp-CodeCell jp-Notebook-cell jp-mod-noOutputs">

research/09_inference.ipynb CHANGED Viewed

@@ -98,9 +98,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
@@ -114,9 +122,11 @@
     "from transformers import AutoModelForSequenceClassification\n",
     "import torch\n",
     "from torch.nn import functional as F\n",
     "\n",
     "\n",
-    "model_name= \"finetuned_entity_categorical_classification/checkpoint-3184\"\n",
     "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
     "\n",
     "model = AutoModelForSequenceClassification.from_pretrained(model_name)\n"
@@ -124,10 +134,53 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
     "\n",
     "\n",
     "def predict(sentence: str):\n",
@@ -140,16 +193,30 @@
     "        \n",
     "    # print(\"logits: \", logits)\n",
     "    predicted_class_id = logits.argmax().item()\n",
     "    # get probabilities using softmax from logit score and convert it to numpy array\n",
     "    probabilities_scores = F.softmax(logits, dim = -1).numpy()[0]\n",
     "    d= {}\n",
     "    for i in range(27):\n",
     "        # print(f\"P({id2label[i]}): {probabilities_scores[i]}\")\n",
-    "        d[f'P({id2label[i]})']= format(probabilities_scores[i], '.2f')\n",
     "        \n",
     "\n",
-    "    print(\"Predicted Class: \", model.config.id2label[predicted_class_id], f\"probabilities_scores: {probabilities_scores[predicted_class_id]}\")\n",
-    "    return d\n",
     "    \n",
     "    \n",
     "    "
@@ -157,42 +224,53 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "P(Beauty_and_Fitness): 1.0167686014028732e-05\n",
-      "P(People_and_Society): 1.406734668307763e-06\n",
-      "P(Travel_and_Transportation): 9.111173540077289e-07\n",
-      "P(Shopping): 2.7279720598016866e-05\n",
-      "P(Adult): 2.7205089736526133e-06\n",
-      "P(Sports): 2.7785404199676123e-06\n",
-      "P(Science): 9.693985703052022e-07\n",
-      "P(Food_and_Drink): 5.907952072448097e-06\n",
-      "P(News): 8.620731023256667e-06\n",
-      "P(Sensitive Subjects): 2.1766395548183937e-06\n",
-      "P(Autos_and_Vehicles): 3.173354627961089e-07\n",
-      "P(Law_and_Government): 1.089682882593479e-06\n",
-      "P(Business_and_Industrial): 2.0000404674647143e-06\n",
-      "P(Health): 8.528571925126016e-06\n",
-      "P(Real Estate): 6.72997032324929e-07\n",
-      "P(Books_and_Literature): 1.7418132074453752e-06\n",
-      "P(Computers_and_Electronics): 0.9998340606689453\n",
-      "P(Internet_and_Telecom): 4.2605301132425666e-05\n",
-      "P(Home_and_Garden): 7.0778082772449125e-06\n",
-      "P(Jobs_and_Education): 3.205217353752232e-07\n",
-      "P(Online Communities): 7.534316409874009e-06\n",
-      "P(Finance): 3.597612248995574e-06\n",
-      "P(Arts_and_Entertainment): 1.5469729532924248e-06\n",
-      "P(Games): 2.201926508860197e-05\n",
-      "P(Hobbies_and_Leisure): 2.3530192265752703e-06\n",
-      "P(Reference): 2.341075600043041e-08\n",
-      "P(Pets_and_Animals): 1.5077214357006596e-06\n",
-      "Predicted Class:  Computers_and_Electronics probabilities_scores: 0.9998340606689453\n"
      ]
     }
    ],
    "source": [
@@ -201,42 +279,53 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "P(Beauty_and_Fitness): 0.0002981989237014204\n",
-      "P(People_and_Society): 1.8243508748128079e-06\n",
-      "P(Travel_and_Transportation): 1.4317002751340624e-05\n",
-      "P(Shopping): 9.405774108017795e-06\n",
-      "P(Adult): 1.2231478194735246e-06\n",
-      "P(Sports): 6.019924967404222e-06\n",
-      "P(Science): 7.067929800541606e-06\n",
-      "P(Food_and_Drink): 0.9972833395004272\n",
-      "P(News): 0.00014127693430054933\n",
-      "P(Sensitive Subjects): 2.4317660063388757e-06\n",
-      "P(Autos_and_Vehicles): 5.870697918908263e-07\n",
-      "P(Law_and_Government): 3.3484843697806355e-06\n",
-      "P(Business_and_Industrial): 5.084546046418836e-06\n",
-      "P(Health): 0.0021307284478098154\n",
-      "P(Real Estate): 1.483008531977248e-06\n",
-      "P(Books_and_Literature): 2.4371431663894327e-06\n",
-      "P(Computers_and_Electronics): 1.0735298928921111e-05\n",
-      "P(Internet_and_Telecom): 2.851840008588624e-06\n",
-      "P(Home_and_Garden): 2.7712192149920156e-06\n",
-      "P(Jobs_and_Education): 1.1146977158205118e-05\n",
-      "P(Online Communities): 7.0186338234634604e-06\n",
-      "P(Finance): 5.121751655678963e-06\n",
-      "P(Arts_and_Entertainment): 8.403771062148735e-06\n",
-      "P(Games): 2.9928612548246747e-06\n",
-      "P(Hobbies_and_Leisure): 3.484110129647888e-05\n",
-      "P(Reference): 6.697590748672155e-08\n",
-      "P(Pets_and_Animals): 5.252794835541863e-06\n",
-      "Predicted Class:  Food_and_Drink probabilities_scores: 0.9972833395004272\n"
      ]
     }
    ],
    "source": [
@@ -245,42 +334,53 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "P(Beauty_and_Fitness): 2.6114428692380898e-05\n",
-      "P(People_and_Society): 6.279856279434171e-07\n",
-      "P(Travel_and_Transportation): 6.017768100718968e-06\n",
-      "P(Shopping): 6.115729320299579e-06\n",
-      "P(Adult): 4.621779794433678e-07\n",
-      "P(Sports): 8.989664479486237e-07\n",
-      "P(Science): 4.8601555135974195e-06\n",
-      "P(Food_and_Drink): 0.9997175335884094\n",
-      "P(News): 0.00015670375432819128\n",
-      "P(Sensitive Subjects): 5.142674694980087e-07\n",
-      "P(Autos_and_Vehicles): 2.1764762436760066e-07\n",
-      "P(Law_and_Government): 1.2030991456413176e-06\n",
-      "P(Business_and_Industrial): 1.6263313682429725e-06\n",
-      "P(Health): 4.478434129850939e-05\n",
-      "P(Real Estate): 6.337517106658197e-07\n",
-      "P(Books_and_Literature): 1.2728096407954581e-06\n",
-      "P(Computers_and_Electronics): 2.8549591206683544e-06\n",
-      "P(Internet_and_Telecom): 1.3799519820167916e-06\n",
-      "P(Home_and_Garden): 2.937797489721561e-06\n",
-      "P(Jobs_and_Education): 4.768957296619192e-06\n",
-      "P(Online Communities): 2.587612470961176e-06\n",
-      "P(Finance): 1.5463368754353723e-06\n",
-      "P(Arts_and_Entertainment): 6.821313945692964e-06\n",
-      "P(Games): 7.65006177516625e-07\n",
-      "P(Hobbies_and_Leisure): 4.179368261247873e-06\n",
-      "P(Reference): 3.270602633165254e-08\n",
-      "P(Pets_and_Animals): 2.580756472525536e-06\n",
-      "Predicted Class:  Food_and_Drink probabilities_scores: 0.9997175335884094\n"
      ]
     }
    ],
    "source": [
@@ -289,42 +389,53 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "P(Beauty_and_Fitness): 6.976195891184034e-06\n",
-      "P(People_and_Society): 1.2303950143177644e-06\n",
-      "P(Travel_and_Transportation): 1.7862849972516415e-06\n",
-      "P(Shopping): 5.573031558014918e-06\n",
-      "P(Adult): 3.2791076591820456e-06\n",
-      "P(Sports): 5.794179287477164e-06\n",
-      "P(Science): 8.48299987410428e-06\n",
-      "P(Food_and_Drink): 0.0005717862513847649\n",
-      "P(News): 1.0014691724791192e-05\n",
-      "P(Sensitive Subjects): 2.9312270726222778e-06\n",
-      "P(Autos_and_Vehicles): 1.5730682889625314e-07\n",
-      "P(Law_and_Government): 1.0351266155339545e-06\n",
-      "P(Business_and_Industrial): 1.9998137759102974e-06\n",
-      "P(Health): 5.863273599970853e-06\n",
-      "P(Real Estate): 2.589280256870552e-07\n",
-      "P(Books_and_Literature): 3.1806489459995646e-06\n",
-      "P(Computers_and_Electronics): 1.6475665688631125e-05\n",
-      "P(Internet_and_Telecom): 1.3075596143607982e-06\n",
-      "P(Home_and_Garden): 1.027156031341292e-05\n",
-      "P(Jobs_and_Education): 1.03862419109646e-06\n",
-      "P(Online Communities): 4.737964445666876e-06\n",
-      "P(Finance): 2.0996037619624985e-06\n",
-      "P(Arts_and_Entertainment): 4.993361471861135e-06\n",
-      "P(Games): 4.1619005060056224e-06\n",
-      "P(Hobbies_and_Leisure): 1.088273165805731e-05\n",
-      "P(Reference): 6.112716022244058e-08\n",
-      "P(Pets_and_Animals): 0.9993135929107666\n",
-      "Predicted Class:  Pets_and_Animals probabilities_scores: 0.9993135929107666\n"
      ]
     }
    ],
    "source": [
@@ -333,42 +444,53 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "P(Beauty_and_Fitness): 1.0418082638352644e-05\n",
-      "P(People_and_Society): 1.198376025968173e-06\n",
-      "P(Travel_and_Transportation): 5.249040100352431e-07\n",
-      "P(Shopping): 1.6788271750556305e-05\n",
-      "P(Adult): 2.3851741843827767e-06\n",
-      "P(Sports): 1.8478541505828616e-06\n",
-      "P(Science): 8.450400628134958e-07\n",
-      "P(Food_and_Drink): 3.6571536838891916e-06\n",
-      "P(News): 4.5494271034840494e-06\n",
-      "P(Sensitive Subjects): 2.1925256987742614e-06\n",
-      "P(Autos_and_Vehicles): 2.598584387669689e-07\n",
-      "P(Law_and_Government): 9.124052553488582e-07\n",
-      "P(Business_and_Industrial): 1.343827193522884e-06\n",
-      "P(Health): 7.631779226358049e-06\n",
-      "P(Real Estate): 4.913577527076995e-07\n",
-      "P(Books_and_Literature): 1.6118407302201376e-06\n",
-      "P(Computers_and_Electronics): 0.9998828172683716\n",
-      "P(Internet_and_Telecom): 2.9297894798219204e-05\n",
-      "P(Home_and_Garden): 5.192091521166731e-06\n",
-      "P(Jobs_and_Education): 2.745251777014346e-07\n",
-      "P(Online Communities): 6.218880571395857e-06\n",
-      "P(Finance): 3.290834229119355e-06\n",
-      "P(Arts_and_Entertainment): 1.541877054478391e-06\n",
-      "P(Games): 1.1492516023281496e-05\n",
-      "P(Hobbies_and_Leisure): 1.9986127881566063e-06\n",
-      "P(Reference): 1.8265923884541735e-08\n",
-      "P(Pets_and_Animals): 1.1247184374951757e-06\n",
-      "Predicted Class:  Computers_and_Electronics probabilities_scores: 0.9998828172683716\n"
      ]
     }
    ],
    "source": [
@@ -377,42 +499,53 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "P(Beauty_and_Fitness): 1.086300744645996e-05\n",
-      "P(People_and_Society): 2.385743300692411e-07\n",
-      "P(Travel_and_Transportation): 1.9932767827413045e-06\n",
-      "P(Shopping): 4.334059667598922e-06\n",
-      "P(Adult): 3.253454110563325e-07\n",
-      "P(Sports): 8.683252303853806e-07\n",
-      "P(Science): 2.3967959350557067e-06\n",
-      "P(Food_and_Drink): 0.9998577833175659\n",
-      "P(News): 5.469225288834423e-05\n",
-      "P(Sensitive Subjects): 3.331420828089904e-07\n",
-      "P(Autos_and_Vehicles): 1.0676290429501023e-07\n",
-      "P(Law_and_Government): 4.7278643933168496e-07\n",
-      "P(Business_and_Industrial): 1.5407667888212018e-06\n",
-      "P(Health): 4.193164568278007e-05\n",
-      "P(Real Estate): 3.750056123408285e-07\n",
-      "P(Books_and_Literature): 4.987622901353461e-07\n",
-      "P(Computers_and_Electronics): 3.906153779098531e-06\n",
-      "P(Internet_and_Telecom): 8.262347819254501e-07\n",
-      "P(Home_and_Garden): 1.5766403294037445e-06\n",
-      "P(Jobs_and_Education): 4.150041149841854e-06\n",
-      "P(Online Communities): 2.0979061901016394e-06\n",
-      "P(Finance): 1.1580733598748338e-06\n",
-      "P(Arts_and_Entertainment): 2.0028785456815967e-06\n",
-      "P(Games): 9.470307986703119e-07\n",
-      "P(Hobbies_and_Leisure): 2.5496683520032093e-06\n",
-      "P(Reference): 1.3998636916312535e-08\n",
-      "P(Pets_and_Animals): 1.9844153484882554e-06\n",
-      "Predicted Class:  Food_and_Drink probabilities_scores: 0.9998577833175659\n"
      ]
     }
    ],
    "source": [
@@ -421,42 +554,53 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "P(Beauty_and_Fitness): 0.00013269745977595448\n",
-      "P(People_and_Society): 4.455394901015097e-06\n",
-      "P(Travel_and_Transportation): 2.5948824259103276e-05\n",
-      "P(Shopping): 0.0005248919478617609\n",
-      "P(Adult): 1.7862246750155464e-05\n",
-      "P(Sports): 1.6017889720387757e-05\n",
-      "P(Science): 2.5951496354537085e-05\n",
-      "P(Food_and_Drink): 0.9478479623794556\n",
-      "P(News): 0.0002582172746770084\n",
-      "P(Sensitive Subjects): 1.79517828655662e-05\n",
-      "P(Autos_and_Vehicles): 4.965268544765422e-06\n",
-      "P(Law_and_Government): 7.921374162833672e-06\n",
-      "P(Business_and_Industrial): 0.0001139482410508208\n",
-      "P(Health): 0.0005791003350168467\n",
-      "P(Real Estate): 6.392176146619022e-06\n",
-      "P(Books_and_Literature): 2.4286606276291423e-05\n",
-      "P(Computers_and_Electronics): 0.049869947135448456\n",
-      "P(Internet_and_Telecom): 9.170828707283363e-05\n",
-      "P(Home_and_Garden): 9.513090481050313e-05\n",
-      "P(Jobs_and_Education): 3.3369826269336045e-05\n",
-      "P(Online Communities): 8.171715307980776e-05\n",
-      "P(Finance): 3.625190947786905e-05\n",
-      "P(Arts_and_Entertainment): 2.533747101551853e-05\n",
-      "P(Games): 8.59149222378619e-05\n",
-      "P(Hobbies_and_Leisure): 2.0291698092478327e-05\n",
-      "P(Reference): 1.9418187946484977e-07\n",
-      "P(Pets_and_Animals): 5.1680701290024444e-05\n",
-      "Predicted Class:  Food_and_Drink probabilities_scores: 0.9478479623794556\n"
      ]
     }
    ],
    "source": [
@@ -465,22 +609,468 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "metadata": {},
-   "outputs": [],
    "source": [
     "predict(\n",
     "    'razer kraken'\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "predict(\"facebook\")"
    ]
   },
   {

   },
   {
    "cell_type": "code",
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/ubuntu/SentenceStructureComparision/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    },
     {
      "name": "stderr",
      "output_type": "stream",
     "from transformers import AutoModelForSequenceClassification\n",
     "import torch\n",
     "from torch.nn import functional as F\n",
+    "import numpy as np\n",
     "\n",
     "\n",
+    "\n",
+    "model_name= \"finetuned_entity_categorical_classification/checkpoint-3212\"\n",
     "tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
     "\n",
     "model = AutoModelForSequenceClassification.from_pretrained(model_name)\n"
   },
   {
    "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
    "metadata": {},
    "outputs": [],
    "source": [
+    "# probabilities = 1 / (1 + np.exp(-logit_score))\n",
+    "def logit2prob(logit):\n",
+    "    # odds =np.exp(logit)\n",
+    "    # prob = odds / (1 + odds)\n",
+    "    prob= 1/(1+ np.exp(-logit))\n",
+    "    return np.round(prob, 3)\n",
+    "\n",
+    "\n",
     "\n",
     "\n",
     "def predict(sentence: str):\n",
     "        \n",
     "    # print(\"logits: \", logits)\n",
     "    predicted_class_id = logits.argmax().item()\n",
+    "    \n",
     "    # get probabilities using softmax from logit score and convert it to numpy array\n",
     "    probabilities_scores = F.softmax(logits, dim = -1).numpy()[0]\n",
+    "    individual_probabilities_scores = logit2prob(logits.numpy()[0])\n",
+    "    \n",
+    "    \n",
     "    d= {}\n",
+    "    d_ind= {}\n",
+    "    # d_ind= {}\n",
     "    for i in range(27):\n",
     "        # print(f\"P({id2label[i]}): {probabilities_scores[i]}\")\n",
+    "        # d[f'P({id2label[i]})']= format(probabilities_scores[i], '.2f')\n",
+    "        d[f'P({id2label[i]})']= round(probabilities_scores[i], 3)\n",
+    "        \n",
+    "        \n",
+    "    for i in range(27):\n",
+    "        # print(f\"P({id2label[i]}): {probabilities_scores[i]}\")\n",
+    "        # d[f'P({id2label[i]})']= format(probabilities_scores[i], '.2f')\n",
+    "        d_ind[f'P({id2label[i]})']= (individual_probabilities_scores[i])\n",
+    "        \n",
     "        \n",
     "\n",
+    "    print(\"Predicted Class: \", model.config.id2label[predicted_class_id], f\"\\nprobabilities_scores: {individual_probabilities_scores[predicted_class_id]}\\n\")\n",
+    "    return d_ind\n",
     "    \n",
     "    \n",
     "    "
   },
   {
    "cell_type": "code",
+   "execution_count": 51,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Predicted Class:  Computers_and_Electronics \n",
+      "probabilities_scores: 1.0\n",
+      "\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'P(Hobbies_and_Leisure)': 0.107,\n",
+       " 'P(News)': 0.003,\n",
+       " 'P(Science)': 0.028,\n",
+       " 'P(Autos_and_Vehicles)': 0.083,\n",
+       " 'P(Health)': 0.011,\n",
+       " 'P(Pets_and_Animals)': 0.006,\n",
+       " 'P(Adult)': 0.093,\n",
+       " 'P(Computers_and_Electronics)': 1.0,\n",
+       " 'P(Online Communities)': 0.116,\n",
+       " 'P(Beauty_and_Fitness)': 0.015,\n",
+       " 'P(People_and_Society)': 0.0,\n",
+       " 'P(Business_and_Industrial)': 0.005,\n",
+       " 'P(Reference)': 0.037,\n",
+       " 'P(Shopping)': 0.158,\n",
+       " 'P(Travel_and_Transportation)': 0.005,\n",
+       " 'P(Food_and_Drink)': 0.032,\n",
+       " 'P(Law_and_Government)': 0.153,\n",
+       " 'P(Books_and_Literature)': 0.008,\n",
+       " 'P(Finance)': 0.041,\n",
+       " 'P(Games)': 0.063,\n",
+       " 'P(Home_and_Garden)': 0.028,\n",
+       " 'P(Jobs_and_Education)': 0.004,\n",
+       " 'P(Arts_and_Entertainment)': 0.011,\n",
+       " 'P(Sensitive Subjects)': 0.004,\n",
+       " 'P(Real Estate)': 0.014,\n",
+       " 'P(Internet_and_Telecom)': 0.019,\n",
+       " 'P(Sports)': 0.023}"
+      ]
+     },
+     "execution_count": 51,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Predicted Class:  Food_and_Drink \n",
+      "probabilities_scores: 1.0\n",
+      "\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'P(Hobbies_and_Leisure)': 0.032,\n",
+       " 'P(News)': 0.167,\n",
+       " 'P(Science)': 0.019,\n",
+       " 'P(Autos_and_Vehicles)': 0.028,\n",
+       " 'P(Health)': 0.134,\n",
+       " 'P(Pets_and_Animals)': 0.004,\n",
+       " 'P(Adult)': 0.018,\n",
+       " 'P(Computers_and_Electronics)': 0.223,\n",
+       " 'P(Online Communities)': 0.169,\n",
+       " 'P(Beauty_and_Fitness)': 0.081,\n",
+       " 'P(People_and_Society)': 0.005,\n",
+       " 'P(Business_and_Industrial)': 0.011,\n",
+       " 'P(Reference)': 0.022,\n",
+       " 'P(Shopping)': 0.054,\n",
+       " 'P(Travel_and_Transportation)': 0.024,\n",
+       " 'P(Food_and_Drink)': 1.0,\n",
+       " 'P(Law_and_Government)': 0.016,\n",
+       " 'P(Books_and_Literature)': 0.066,\n",
+       " 'P(Finance)': 0.01,\n",
+       " 'P(Games)': 0.063,\n",
+       " 'P(Home_and_Garden)': 0.044,\n",
+       " 'P(Jobs_and_Education)': 0.033,\n",
+       " 'P(Arts_and_Entertainment)': 0.286,\n",
+       " 'P(Sensitive Subjects)': 0.032,\n",
+       " 'P(Real Estate)': 0.003,\n",
+       " 'P(Internet_and_Telecom)': 0.009,\n",
+       " 'P(Sports)': 0.016}"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 37,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Predicted Class:  Food_and_Drink \n",
+      "probabilities_scores: 1.0\n",
+      "\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'P(Hobbies_and_Leisure)': 0.048,\n",
+       " 'P(News)': 0.202,\n",
+       " 'P(Science)': 0.025,\n",
+       " 'P(Autos_and_Vehicles)': 0.095,\n",
+       " 'P(Health)': 0.094,\n",
+       " 'P(Pets_and_Animals)': 0.006,\n",
+       " 'P(Adult)': 0.016,\n",
+       " 'P(Computers_and_Electronics)': 0.129,\n",
+       " 'P(Online Communities)': 0.078,\n",
+       " 'P(Beauty_and_Fitness)': 0.122,\n",
+       " 'P(People_and_Society)': 0.008,\n",
+       " 'P(Business_and_Industrial)': 0.022,\n",
+       " 'P(Reference)': 0.014,\n",
+       " 'P(Shopping)': 0.046,\n",
+       " 'P(Travel_and_Transportation)': 0.024,\n",
+       " 'P(Food_and_Drink)': 1.0,\n",
+       " 'P(Law_and_Government)': 0.013,\n",
+       " 'P(Books_and_Literature)': 0.038,\n",
+       " 'P(Finance)': 0.026,\n",
+       " 'P(Games)': 0.091,\n",
+       " 'P(Home_and_Garden)': 0.025,\n",
+       " 'P(Jobs_and_Education)': 0.033,\n",
+       " 'P(Arts_and_Entertainment)': 0.233,\n",
+       " 'P(Sensitive Subjects)': 0.022,\n",
+       " 'P(Real Estate)': 0.005,\n",
+       " 'P(Internet_and_Telecom)': 0.003,\n",
+       " 'P(Sports)': 0.039}"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Predicted Class:  Food_and_Drink \n",
+      "probabilities_scores: 0.9980000257492065\n",
+      "\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'P(Hobbies_and_Leisure)': 0.113,\n",
+       " 'P(News)': 0.037,\n",
+       " 'P(Science)': 0.024,\n",
+       " 'P(Autos_and_Vehicles)': 0.05,\n",
+       " 'P(Health)': 0.039,\n",
+       " 'P(Pets_and_Animals)': 0.444,\n",
+       " 'P(Adult)': 0.003,\n",
+       " 'P(Computers_and_Electronics)': 0.022,\n",
+       " 'P(Online Communities)': 0.12,\n",
+       " 'P(Beauty_and_Fitness)': 0.114,\n",
+       " 'P(People_and_Society)': 0.001,\n",
+       " 'P(Business_and_Industrial)': 0.008,\n",
+       " 'P(Reference)': 0.003,\n",
+       " 'P(Shopping)': 0.014,\n",
+       " 'P(Travel_and_Transportation)': 0.009,\n",
+       " 'P(Food_and_Drink)': 0.998,\n",
+       " 'P(Law_and_Government)': 0.005,\n",
+       " 'P(Books_and_Literature)': 0.006,\n",
+       " 'P(Finance)': 0.009,\n",
+       " 'P(Games)': 0.052,\n",
+       " 'P(Home_and_Garden)': 0.006,\n",
+       " 'P(Jobs_and_Education)': 0.005,\n",
+       " 'P(Arts_and_Entertainment)': 0.199,\n",
+       " 'P(Sensitive Subjects)': 0.033,\n",
+       " 'P(Real Estate)': 0.003,\n",
+       " 'P(Internet_and_Telecom)': 0.001,\n",
+       " 'P(Sports)': 0.123}"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Predicted Class:  Computers_and_Electronics \n",
+      "probabilities_scores: 1.0\n",
+      "\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'P(Hobbies_and_Leisure)': 0.134,\n",
+       " 'P(News)': 0.002,\n",
+       " 'P(Science)': 0.027,\n",
+       " 'P(Autos_and_Vehicles)': 0.061,\n",
+       " 'P(Health)': 0.008,\n",
+       " 'P(Pets_and_Animals)': 0.006,\n",
+       " 'P(Adult)': 0.069,\n",
+       " 'P(Computers_and_Electronics)': 1.0,\n",
+       " 'P(Online Communities)': 0.16,\n",
+       " 'P(Beauty_and_Fitness)': 0.015,\n",
+       " 'P(People_and_Society)': 0.0,\n",
+       " 'P(Business_and_Industrial)': 0.003,\n",
+       " 'P(Reference)': 0.019,\n",
+       " 'P(Shopping)': 0.147,\n",
+       " 'P(Travel_and_Transportation)': 0.005,\n",
+       " 'P(Food_and_Drink)': 0.023,\n",
+       " 'P(Law_and_Government)': 0.115,\n",
+       " 'P(Books_and_Literature)': 0.007,\n",
+       " 'P(Finance)': 0.037,\n",
+       " 'P(Games)': 0.042,\n",
+       " 'P(Home_and_Garden)': 0.032,\n",
+       " 'P(Jobs_and_Education)': 0.003,\n",
+       " 'P(Arts_and_Entertainment)': 0.01,\n",
+       " 'P(Sensitive Subjects)': 0.003,\n",
+       " 'P(Real Estate)': 0.012,\n",
+       " 'P(Internet_and_Telecom)': 0.016,\n",
+       " 'P(Sports)': 0.015}"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 40,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Predicted Class:  Food_and_Drink \n",
+      "probabilities_scores: 0.9909999966621399\n",
+      "\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'P(Hobbies_and_Leisure)': 0.02,\n",
+       " 'P(News)': 0.017,\n",
+       " 'P(Science)': 0.008,\n",
+       " 'P(Autos_and_Vehicles)': 0.06,\n",
+       " 'P(Health)': 0.032,\n",
+       " 'P(Pets_and_Animals)': 0.004,\n",
+       " 'P(Adult)': 0.022,\n",
+       " 'P(Computers_and_Electronics)': 0.989,\n",
+       " 'P(Online Communities)': 0.056,\n",
+       " 'P(Beauty_and_Fitness)': 0.026,\n",
+       " 'P(People_and_Society)': 0.0,\n",
+       " 'P(Business_and_Industrial)': 0.008,\n",
+       " 'P(Reference)': 0.052,\n",
+       " 'P(Shopping)': 0.105,\n",
+       " 'P(Travel_and_Transportation)': 0.012,\n",
+       " 'P(Food_and_Drink)': 0.991,\n",
+       " 'P(Law_and_Government)': 0.007,\n",
+       " 'P(Books_and_Literature)': 0.009,\n",
+       " 'P(Finance)': 0.014,\n",
+       " 'P(Games)': 0.284,\n",
+       " 'P(Home_and_Garden)': 0.015,\n",
+       " 'P(Jobs_and_Education)': 0.017,\n",
+       " 'P(Arts_and_Entertainment)': 0.031,\n",
+       " 'P(Sensitive Subjects)': 0.014,\n",
+       " 'P(Real Estate)': 0.003,\n",
+       " 'P(Internet_and_Telecom)': 0.003,\n",
+       " 'P(Sports)': 0.021}"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 41,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
+      "Predicted Class:  Computers_and_Electronics \n",
+      "probabilities_scores: 1.0\n",
+      "\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'P(Hobbies_and_Leisure)': 0.054,\n",
+       " 'P(News)': 0.003,\n",
+       " 'P(Science)': 0.011,\n",
+       " 'P(Autos_and_Vehicles)': 0.122,\n",
+       " 'P(Health)': 0.01,\n",
+       " 'P(Pets_and_Animals)': 0.004,\n",
+       " 'P(Adult)': 0.054,\n",
+       " 'P(Computers_and_Electronics)': 1.0,\n",
+       " 'P(Online Communities)': 0.081,\n",
+       " 'P(Beauty_and_Fitness)': 0.016,\n",
+       " 'P(People_and_Society)': 0.0,\n",
+       " 'P(Business_and_Industrial)': 0.005,\n",
+       " 'P(Reference)': 0.064,\n",
+       " 'P(Shopping)': 0.224,\n",
+       " 'P(Travel_and_Transportation)': 0.006,\n",
+       " 'P(Food_and_Drink)': 0.172,\n",
+       " 'P(Law_and_Government)': 0.051,\n",
+       " 'P(Books_and_Literature)': 0.006,\n",
+       " 'P(Finance)': 0.025,\n",
+       " 'P(Games)': 0.138,\n",
+       " 'P(Home_and_Garden)': 0.03,\n",
+       " 'P(Jobs_and_Education)': 0.006,\n",
+       " 'P(Arts_and_Entertainment)': 0.008,\n",
+       " 'P(Sensitive Subjects)': 0.003,\n",
+       " 'P(Real Estate)': 0.006,\n",
+       " 'P(Internet_and_Telecom)': 0.004,\n",
+       " 'P(Sports)': 0.018}"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
   },
   {
    "cell_type": "code",
+   "execution_count": 42,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predicted Class:  Computers_and_Electronics \n",
+      "probabilities_scores: 1.0\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'P(Hobbies_and_Leisure)': 0.077,\n",
+       " 'P(News)': 0.005,\n",
+       " 'P(Science)': 0.009,\n",
+       " 'P(Autos_and_Vehicles)': 0.077,\n",
+       " 'P(Health)': 0.015,\n",
+       " 'P(Pets_and_Animals)': 0.003,\n",
+       " 'P(Adult)': 0.073,\n",
+       " 'P(Computers_and_Electronics)': 1.0,\n",
+       " 'P(Online Communities)': 0.086,\n",
+       " 'P(Beauty_and_Fitness)': 0.022,\n",
+       " 'P(People_and_Society)': 0.0,\n",
+       " 'P(Business_and_Industrial)': 0.004,\n",
+       " 'P(Reference)': 0.021,\n",
+       " 'P(Shopping)': 0.203,\n",
+       " 'P(Travel_and_Transportation)': 0.003,\n",
+       " 'P(Food_and_Drink)': 0.241,\n",
+       " 'P(Law_and_Government)': 0.009,\n",
+       " 'P(Books_and_Literature)': 0.003,\n",
+       " 'P(Finance)': 0.029,\n",
+       " 'P(Games)': 0.195,\n",
+       " 'P(Home_and_Garden)': 0.044,\n",
+       " 'P(Jobs_and_Education)': 0.004,\n",
+       " 'P(Arts_and_Entertainment)': 0.013,\n",
+       " 'P(Sensitive Subjects)': 0.003,\n",
+       " 'P(Real Estate)': 0.012,\n",
+       " 'P(Internet_and_Telecom)': 0.004,\n",
+       " 'P(Sports)': 0.017}"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "predict(\n",
     "    'razer kraken'\n",
     ")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predicted Class:  Online Communities \n",
+      "probabilities_scores: 0.9990000128746033\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'P(Hobbies_and_Leisure)': 0.009,\n",
+       " 'P(News)': 0.037,\n",
+       " 'P(Science)': 0.014,\n",
+       " 'P(Autos_and_Vehicles)': 0.004,\n",
+       " 'P(Health)': 0.007,\n",
+       " 'P(Pets_and_Animals)': 0.048,\n",
+       " 'P(Adult)': 0.287,\n",
+       " 'P(Computers_and_Electronics)': 0.536,\n",
+       " 'P(Online Communities)': 0.999,\n",
+       " 'P(Beauty_and_Fitness)': 0.002,\n",
+       " 'P(People_and_Society)': 0.001,\n",
+       " 'P(Business_and_Industrial)': 0.002,\n",
+       " 'P(Reference)': 0.006,\n",
+       " 'P(Shopping)': 0.038,\n",
+       " 'P(Travel_and_Transportation)': 0.016,\n",
+       " 'P(Food_and_Drink)': 0.012,\n",
+       " 'P(Law_and_Government)': 0.024,\n",
+       " 'P(Books_and_Literature)': 0.059,\n",
+       " 'P(Finance)': 0.001,\n",
+       " 'P(Games)': 0.025,\n",
+       " 'P(Home_and_Garden)': 0.377,\n",
+       " 'P(Jobs_and_Education)': 0.018,\n",
+       " 'P(Arts_and_Entertainment)': 0.028,\n",
+       " 'P(Sensitive Subjects)': 0.072,\n",
+       " 'P(Real Estate)': 0.002,\n",
+       " 'P(Internet_and_Telecom)': 0.003,\n",
+       " 'P(Sports)': 0.006}"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predict(\"facebook\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predicted Class:  Computers_and_Electronics \n",
+      "probabilities_scores: 1.0\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'P(Hobbies_and_Leisure)': 0.054,\n",
+       " 'P(News)': 0.003,\n",
+       " 'P(Science)': 0.011,\n",
+       " 'P(Autos_and_Vehicles)': 0.122,\n",
+       " 'P(Health)': 0.01,\n",
+       " 'P(Pets_and_Animals)': 0.004,\n",
+       " 'P(Adult)': 0.054,\n",
+       " 'P(Computers_and_Electronics)': 1.0,\n",
+       " 'P(Online Communities)': 0.081,\n",
+       " 'P(Beauty_and_Fitness)': 0.016,\n",
+       " 'P(People_and_Society)': 0.0,\n",
+       " 'P(Business_and_Industrial)': 0.005,\n",
+       " 'P(Reference)': 0.064,\n",
+       " 'P(Shopping)': 0.224,\n",
+       " 'P(Travel_and_Transportation)': 0.006,\n",
+       " 'P(Food_and_Drink)': 0.172,\n",
+       " 'P(Law_and_Government)': 0.051,\n",
+       " 'P(Books_and_Literature)': 0.006,\n",
+       " 'P(Finance)': 0.025,\n",
+       " 'P(Games)': 0.138,\n",
+       " 'P(Home_and_Garden)': 0.03,\n",
+       " 'P(Jobs_and_Education)': 0.006,\n",
+       " 'P(Arts_and_Entertainment)': 0.008,\n",
+       " 'P(Sensitive Subjects)': 0.003,\n",
+       " 'P(Real Estate)': 0.006,\n",
+       " 'P(Internet_and_Telecom)': 0.004,\n",
+       " 'P(Sports)': 0.018}"
+      ]
+     },
+     "execution_count": 44,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predict('apple iphone')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predicted Class:  Computers_and_Electronics \n",
+      "probabilities_scores: 1.0\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'P(Hobbies_and_Leisure)': 0.186,\n",
+       " 'P(News)': 0.003,\n",
+       " 'P(Science)': 0.009,\n",
+       " 'P(Autos_and_Vehicles)': 0.512,\n",
+       " 'P(Health)': 0.002,\n",
+       " 'P(Pets_and_Animals)': 0.002,\n",
+       " 'P(Adult)': 0.039,\n",
+       " 'P(Computers_and_Electronics)': 1.0,\n",
+       " 'P(Online Communities)': 0.061,\n",
+       " 'P(Beauty_and_Fitness)': 0.003,\n",
+       " 'P(People_and_Society)': 0.0,\n",
+       " 'P(Business_and_Industrial)': 0.001,\n",
+       " 'P(Reference)': 0.015,\n",
+       " 'P(Shopping)': 0.274,\n",
+       " 'P(Travel_and_Transportation)': 0.002,\n",
+       " 'P(Food_and_Drink)': 0.009,\n",
+       " 'P(Law_and_Government)': 0.058,\n",
+       " 'P(Books_and_Literature)': 0.002,\n",
+       " 'P(Finance)': 0.033,\n",
+       " 'P(Games)': 0.151,\n",
+       " 'P(Home_and_Garden)': 0.027,\n",
+       " 'P(Jobs_and_Education)': 0.002,\n",
+       " 'P(Arts_and_Entertainment)': 0.005,\n",
+       " 'P(Sensitive Subjects)': 0.001,\n",
+       " 'P(Real Estate)': 0.035,\n",
+       " 'P(Internet_and_Telecom)': 0.001,\n",
+       " 'P(Sports)': 0.008}"
+      ]
+     },
+     "execution_count": 45,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predict('best vr')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predicted Class:  Computers_and_Electronics \n",
+      "probabilities_scores: 1.0\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'P(Hobbies_and_Leisure)': 0.186,\n",
+       " 'P(News)': 0.003,\n",
+       " 'P(Science)': 0.009,\n",
+       " 'P(Autos_and_Vehicles)': 0.512,\n",
+       " 'P(Health)': 0.002,\n",
+       " 'P(Pets_and_Animals)': 0.002,\n",
+       " 'P(Adult)': 0.039,\n",
+       " 'P(Computers_and_Electronics)': 1.0,\n",
+       " 'P(Online Communities)': 0.061,\n",
+       " 'P(Beauty_and_Fitness)': 0.003,\n",
+       " 'P(People_and_Society)': 0.0,\n",
+       " 'P(Business_and_Industrial)': 0.001,\n",
+       " 'P(Reference)': 0.015,\n",
+       " 'P(Shopping)': 0.274,\n",
+       " 'P(Travel_and_Transportation)': 0.002,\n",
+       " 'P(Food_and_Drink)': 0.009,\n",
+       " 'P(Law_and_Government)': 0.058,\n",
+       " 'P(Books_and_Literature)': 0.002,\n",
+       " 'P(Finance)': 0.033,\n",
+       " 'P(Games)': 0.151,\n",
+       " 'P(Home_and_Garden)': 0.027,\n",
+       " 'P(Jobs_and_Education)': 0.002,\n",
+       " 'P(Arts_and_Entertainment)': 0.005,\n",
+       " 'P(Sensitive Subjects)': 0.001,\n",
+       " 'P(Real Estate)': 0.035,\n",
+       " 'P(Internet_and_Telecom)': 0.001,\n",
+       " 'P(Sports)': 0.008}"
+      ]
+     },
+     "execution_count": 46,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predict(\"best vr\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Predicted Class:  Adult \n",
+      "probabilities_scores: 0.7149999737739563\n",
+      "\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'P(Hobbies_and_Leisure)': 0.684,\n",
+       " 'P(News)': 0.009,\n",
+       " 'P(Science)': 0.001,\n",
+       " 'P(Autos_and_Vehicles)': 0.004,\n",
+       " 'P(Health)': 0.001,\n",
+       " 'P(Pets_and_Animals)': 0.0,\n",
+       " 'P(Adult)': 0.715,\n",
+       " 'P(Computers_and_Electronics)': 0.274,\n",
+       " 'P(Online Communities)': 0.246,\n",
+       " 'P(Beauty_and_Fitness)': 0.003,\n",
+       " 'P(People_and_Society)': 0.001,\n",
+       " 'P(Business_and_Industrial)': 0.0,\n",
+       " 'P(Reference)': 0.0,\n",
+       " 'P(Shopping)': 0.022,\n",
+       " 'P(Travel_and_Transportation)': 0.001,\n",
+       " 'P(Food_and_Drink)': 0.002,\n",
+       " 'P(Law_and_Government)': 0.021,\n",
+       " 'P(Books_and_Literature)': 0.007,\n",
+       " 'P(Finance)': 0.003,\n",
+       " 'P(Games)': 0.012,\n",
+       " 'P(Home_and_Garden)': 0.178,\n",
+       " 'P(Jobs_and_Education)': 0.002,\n",
+       " 'P(Arts_and_Entertainment)': 0.01,\n",
+       " 'P(Sensitive Subjects)': 0.001,\n",
+       " 'P(Real Estate)': 0.026,\n",
+       " 'P(Internet_and_Telecom)': 0.0,\n",
+       " 'P(Sports)': 0.02}"
+      ]
+     },
+     "execution_count": 47,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predict(\"pa best views\")"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
    "source": [
+    "inputs = tokenizer(\"best cat ear headphones\", return_tensors=\"pt\")\n",
+    "with torch.no_grad():\n",
+    "    logits = model(**inputs).logits"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([-1.353771 , -5.8301578, -4.050355 , -1.9018538, -5.129807 ,\n",
+       "       -5.2707334, -2.696651 ,  8.821061 , -2.0982835, -4.4173856,\n",
+       "       -9.076361 , -5.888918 , -3.7155762, -1.0305756, -5.5817475,\n",
+       "       -3.987473 , -2.4096951, -5.1136127, -3.217719 , -2.938894 ,\n",
+       "       -3.7113686, -5.8976064, -4.788314 , -6.4181705, -3.5685277,\n",
+       "       -4.5266075, -4.3206973], dtype=float32)"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "l= logits.numpy()[0]\n",
+    "l"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# logit2prob <- function(logit){\n",
+    "#   odds <- exp(logit)\n",
+    "#   prob <- odds / (1 + odds)\n",
+    "#   return(prob)\n",
+    "# }\n",
+    "def logit2prob(logit):\n",
+    "    odds =np.exp(logit)\n",
+    "    prob = odds / (1 + odds)\n",
+    "    return np.round(prob, 2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.21\n",
+      "0.0\n",
+      "0.02\n",
+      "0.13\n",
+      "0.01\n",
+      "0.01\n",
+      "0.06\n",
+      "1.0\n",
+      "0.11\n",
+      "0.01\n",
+      "0.0\n",
+      "0.0\n",
+      "0.02\n",
+      "0.26\n",
+      "0.0\n",
+      "0.02\n",
+      "0.08\n",
+      "0.01\n",
+      "0.04\n",
+      "0.05\n",
+      "0.02\n",
+      "0.0\n",
+      "0.01\n",
+      "0.0\n",
+      "0.03\n",
+      "0.01\n",
+      "0.01\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i in l:\n",
+    "    print(round(logit2prob(i), 2))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([0.21, 0.  , 0.02, 0.13, 0.01, 0.01, 0.06, 1.  , 0.11, 0.01, 0.  ,\n",
+       "       0.  , 0.02, 0.26, 0.  , 0.02, 0.08, 0.01, 0.04, 0.05, 0.02, 0.  ,\n",
+       "       0.01, 0.  , 0.03, 0.01, 0.01], dtype=float32)"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "logit2prob(l)"
    ]
   },
   {