File size: 22,167 Bytes
01b66c2
1
{"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[],"dockerImageVersionId":30733,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"## Load dataset","metadata":{}},{"cell_type":"code","source":"from datasets import load_dataset","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:39:56.584242Z","iopub.execute_input":"2024-07-06T11:39:56.584593Z","iopub.status.idle":"2024-07-06T11:39:58.156952Z","shell.execute_reply.started":"2024-07-06T11:39:56.584562Z","shell.execute_reply":"2024-07-06T11:39:58.155992Z"},"trusted":true},"execution_count":1,"outputs":[]},{"cell_type":"code","source":"shoe_dataset = load_dataset(\"mazed/amazon_shoe_review\")","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:39:58.158713Z","iopub.execute_input":"2024-07-06T11:39:58.159533Z","iopub.status.idle":"2024-07-06T11:40:01.257551Z","shell.execute_reply.started":"2024-07-06T11:39:58.159500Z","shell.execute_reply":"2024-07-06T11:40:01.256701Z"},"trusted":true},"execution_count":2,"outputs":[{"output_type":"display_data","data":{"text/plain":"Downloading readme:   0%|          | 0.00/456 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"412edfda83f74b319c351e3e38612e97"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading data:   0%|          | 0.00/10.1M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"62223b07878d48c9921a4bba61638cff"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading data:   0%|          | 0.00/1.11M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"c7feddfa7cdd47d08233c3aafd5e35ac"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating train split:   0%|          | 0/90000 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"dbc9650a4e904800af19b0af8c2cf71b"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating test split:   0%|          | 0/10000 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5b187d257eb949faa5c9bd2ffa889fd6"}},"metadata":{}}]},{"cell_type":"code","source":"shoe_dataset","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:01.258601Z","iopub.execute_input":"2024-07-06T11:40:01.258922Z","iopub.status.idle":"2024-07-06T11:40:01.266386Z","shell.execute_reply.started":"2024-07-06T11:40:01.258896Z","shell.execute_reply":"2024-07-06T11:40:01.265351Z"},"trusted":true},"execution_count":3,"outputs":[{"execution_count":3,"output_type":"execute_result","data":{"text/plain":"DatasetDict({\n    train: Dataset({\n        features: ['labels', 'text'],\n        num_rows: 90000\n    })\n    test: Dataset({\n        features: ['labels', 'text'],\n        num_rows: 10000\n    })\n})"},"metadata":{}}]},{"cell_type":"code","source":"shoe_dataset[\"train\"][0]","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:01.268996Z","iopub.execute_input":"2024-07-06T11:40:01.269313Z","iopub.status.idle":"2024-07-06T11:40:01.307838Z","shell.execute_reply.started":"2024-07-06T11:40:01.269277Z","shell.execute_reply":"2024-07-06T11:40:01.306874Z"},"trusted":true},"execution_count":4,"outputs":[{"execution_count":4,"output_type":"execute_result","data":{"text/plain":"{'labels': 1,\n 'text': \"Material looks cheaper than what I expected. Doesn't seem like real quality leather.\"}"},"metadata":{}}]},{"cell_type":"markdown","source":"## Train","metadata":{}},{"cell_type":"code","source":"import transformers","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:01.309006Z","iopub.execute_input":"2024-07-06T11:40:01.309354Z","iopub.status.idle":"2024-07-06T11:40:04.611923Z","shell.execute_reply.started":"2024-07-06T11:40:01.309323Z","shell.execute_reply":"2024-07-06T11:40:04.611040Z"},"trusted":true},"execution_count":5,"outputs":[]},{"cell_type":"code","source":"from transformers import (\nAutoModelForSequenceClassification,\nAutoTokenizer,\nTrainer,\nTrainingArguments,\n)","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:04.613114Z","iopub.execute_input":"2024-07-06T11:40:04.613764Z","iopub.status.idle":"2024-07-06T11:40:16.018456Z","shell.execute_reply.started":"2024-07-06T11:40:04.613728Z","shell.execute_reply":"2024-07-06T11:40:16.017695Z"},"trusted":true},"execution_count":6,"outputs":[{"name":"stderr","text":"2024-07-06 11:40:07.714038: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n2024-07-06 11:40:07.714148: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n2024-07-06 11:40:07.799139: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","output_type":"stream"}]},{"cell_type":"code","source":"train_dataset = shoe_dataset['train']\nvalid_dataset = shoe_dataset['test']","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:16.019449Z","iopub.execute_input":"2024-07-06T11:40:16.019980Z","iopub.status.idle":"2024-07-06T11:40:16.024606Z","shell.execute_reply.started":"2024-07-06T11:40:16.019955Z","shell.execute_reply":"2024-07-06T11:40:16.023614Z"},"trusted":true},"execution_count":7,"outputs":[]},{"cell_type":"code","source":"train_dataset","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:16.025896Z","iopub.execute_input":"2024-07-06T11:40:16.026235Z","iopub.status.idle":"2024-07-06T11:40:16.046752Z","shell.execute_reply.started":"2024-07-06T11:40:16.026205Z","shell.execute_reply":"2024-07-06T11:40:16.045870Z"},"trusted":true},"execution_count":8,"outputs":[{"execution_count":8,"output_type":"execute_result","data":{"text/plain":"Dataset({\n    features: ['labels', 'text'],\n    num_rows: 90000\n})"},"metadata":{}}]},{"cell_type":"code","source":"valid_dataset","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:16.047662Z","iopub.execute_input":"2024-07-06T11:40:16.047963Z","iopub.status.idle":"2024-07-06T11:40:16.055090Z","shell.execute_reply.started":"2024-07-06T11:40:16.047932Z","shell.execute_reply":"2024-07-06T11:40:16.054115Z"},"trusted":true},"execution_count":9,"outputs":[{"execution_count":9,"output_type":"execute_result","data":{"text/plain":"Dataset({\n    features: ['labels', 'text'],\n    num_rows: 10000\n})"},"metadata":{}}]},{"cell_type":"markdown","source":"Define a function to compute different metrics.","metadata":{}},{"cell_type":"code","source":"from sklearn.metrics import accuracy_score, precision_recall_fscore_support","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:16.058814Z","iopub.execute_input":"2024-07-06T11:40:16.059106Z","iopub.status.idle":"2024-07-06T11:40:16.065546Z","shell.execute_reply.started":"2024-07-06T11:40:16.059084Z","shell.execute_reply":"2024-07-06T11:40:16.064790Z"},"trusted":true},"execution_count":10,"outputs":[]},{"cell_type":"code","source":"def compute_metrics(pred):\n    labels = pred.label_ids\n    preds = pred.predictions.argmax(-1)\n    precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')\n    acc = accuracy_score(labels, preds)\n    return {\"accuracy\": acc, \"f1\":f1, \"precision\": precision, \"recall\": recall}","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:16.066824Z","iopub.execute_input":"2024-07-06T11:40:16.067093Z","iopub.status.idle":"2024-07-06T11:40:16.073528Z","shell.execute_reply.started":"2024-07-06T11:40:16.067071Z","shell.execute_reply":"2024-07-06T11:40:16.072862Z"},"trusted":true},"execution_count":11,"outputs":[]},{"cell_type":"markdown","source":"Let's download the base model and its tokenizer from the Hugging Face Hub.","metadata":{}},{"cell_type":"code","source":"base_model_id = \"distilbert-base-uncased\"\nnum_labels = 5","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:16.074639Z","iopub.execute_input":"2024-07-06T11:40:16.075438Z","iopub.status.idle":"2024-07-06T11:40:16.081021Z","shell.execute_reply.started":"2024-07-06T11:40:16.075406Z","shell.execute_reply":"2024-07-06T11:40:16.079893Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"code","source":"model = AutoModelForSequenceClassification.from_pretrained(base_model_id, num_labels=num_labels)\ntokenizer = AutoTokenizer.from_pretrained(base_model_id)","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:16.082025Z","iopub.execute_input":"2024-07-06T11:40:16.082303Z","iopub.status.idle":"2024-07-06T11:40:20.918564Z","shell.execute_reply.started":"2024-07-06T11:40:16.082281Z","shell.execute_reply":"2024-07-06T11:40:20.917793Z"},"trusted":true},"execution_count":13,"outputs":[{"output_type":"display_data","data":{"text/plain":"config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"0c6c35761e27415ab0d1b26978d516f7"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"1813d959f4414f529de98a5f1246c001"}},"metadata":{}},{"name":"stderr","text":"Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"71fe18fee3de41778a89582ab748aa13"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"10bef65cc9064fafa0f47bc9fdb0584b"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5c70807dcacc4e749cf2cb69f92df18c"}},"metadata":{}}]},{"cell_type":"code","source":"sample_text=\"This is a sample text.\"","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:20.919731Z","iopub.execute_input":"2024-07-06T11:40:20.920023Z","iopub.status.idle":"2024-07-06T11:40:20.924181Z","shell.execute_reply.started":"2024-07-06T11:40:20.919999Z","shell.execute_reply":"2024-07-06T11:40:20.923129Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"encoded_sample_text=tokenizer(sample_text)","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:20.925200Z","iopub.execute_input":"2024-07-06T11:40:20.925780Z","iopub.status.idle":"2024-07-06T11:40:20.935207Z","shell.execute_reply.started":"2024-07-06T11:40:20.925753Z","shell.execute_reply":"2024-07-06T11:40:20.934371Z"},"trusted":true},"execution_count":15,"outputs":[]},{"cell_type":"code","source":"encoded_sample_text","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:20.936375Z","iopub.execute_input":"2024-07-06T11:40:20.937028Z","iopub.status.idle":"2024-07-06T11:40:20.943328Z","shell.execute_reply.started":"2024-07-06T11:40:20.936997Z","shell.execute_reply":"2024-07-06T11:40:20.942359Z"},"trusted":true},"execution_count":16,"outputs":[{"execution_count":16,"output_type":"execute_result","data":{"text/plain":"{'input_ids': [101, 2023, 2003, 1037, 7099, 3793, 1012, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1]}"},"metadata":{}}]},{"cell_type":"markdown","source":"Now, we define a function to tokenize the datasets.","metadata":{}},{"cell_type":"code","source":"def tokenize(batch):\n    return tokenizer(batch['text'], padding='max_length', truncation=True)\n","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:20.944499Z","iopub.execute_input":"2024-07-06T11:40:20.945119Z","iopub.status.idle":"2024-07-06T11:40:20.949798Z","shell.execute_reply.started":"2024-07-06T11:40:20.945088Z","shell.execute_reply":"2024-07-06T11:40:20.948821Z"},"trusted":true},"execution_count":17,"outputs":[]},{"cell_type":"code","source":"# # Define the tokenize function\n# def tokenize(batch):\n#     try:\n#         # Tokenize the text and return the tokenized output\n#         tokenized = tokenizer(batch['text'], padding='max_length', truncation=True)\n#         return tokenized\n#     except Exception as e:\n#         print(f\"Error tokenizing batch: {e}\")\n#         print(batch['text'])\n#         return {}","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:20.950989Z","iopub.execute_input":"2024-07-06T11:40:20.951346Z","iopub.status.idle":"2024-07-06T11:40:20.958513Z","shell.execute_reply.started":"2024-07-06T11:40:20.951317Z","shell.execute_reply":"2024-07-06T11:40:20.957759Z"},"trusted":true},"execution_count":18,"outputs":[]},{"cell_type":"code","source":"train_dataset = train_dataset.map(tokenize, batched=True, batch_size=len(train_dataset))\nvalid_dataset = valid_dataset.map(tokenize, batched=True, batch_size=len(valid_dataset))","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:20.959481Z","iopub.execute_input":"2024-07-06T11:40:20.959791Z","iopub.status.idle":"2024-07-06T11:40:58.433932Z","shell.execute_reply.started":"2024-07-06T11:40:20.959768Z","shell.execute_reply":"2024-07-06T11:40:58.433027Z"},"trusted":true},"execution_count":19,"outputs":[{"output_type":"display_data","data":{"text/plain":"Map:   0%|          | 0/90000 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"485dc457c24d4c7db98150fe42bd77f0"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Map:   0%|          | 0/10000 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"21553dd3952c4e9eb4e8d3c3888f46d8"}},"metadata":{}}]},{"cell_type":"markdown","source":"Define the TrainingArguments for our training job: hyperparameters, wherere to save the model etc.","metadata":{}},{"cell_type":"code","source":"epochs = 3\nlearning_rate = 5e-5\ntrain_batch_size = 32\neval_batch_size = 32\nsave_strategy = 'epoch'","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:41:07.493826Z","iopub.execute_input":"2024-07-06T11:41:07.494563Z","iopub.status.idle":"2024-07-06T11:41:07.499061Z","shell.execute_reply.started":"2024-07-06T11:41:07.494532Z","shell.execute_reply":"2024-07-06T11:41:07.497915Z"},"trusted":true},"execution_count":20,"outputs":[]},{"cell_type":"code","source":"training_args = TrainingArguments(\noutput_dir=\"/kaggle/working/\",\nrun_name=\"bert-base-uncased-finetune-sst2\",\nnum_train_epochs=epochs,\nper_device_train_batch_size=train_batch_size,\nper_device_eval_batch_size=eval_batch_size,\nsave_strategy=save_strategy,\neval_strategy='epoch',\nlearning_rate=learning_rate,\n)","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:41:23.757731Z","iopub.execute_input":"2024-07-06T11:41:23.758089Z","iopub.status.idle":"2024-07-06T11:41:23.869637Z","shell.execute_reply.started":"2024-07-06T11:41:23.758062Z","shell.execute_reply":"2024-07-06T11:41:23.868906Z"},"trusted":true},"execution_count":21,"outputs":[]},{"cell_type":"markdown","source":"Now, we use the trainer object to put all the pieces together.","metadata":{}},{"cell_type":"code","source":"trainer= Trainer(\nmodel=model,\nargs=training_args,\ntokenizer=tokenizer,\ncompute_metrics=compute_metrics,\ntrain_dataset=train_dataset,\neval_dataset=valid_dataset,\n)","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:41:32.901266Z","iopub.execute_input":"2024-07-06T11:41:32.901629Z","iopub.status.idle":"2024-07-06T11:41:33.696441Z","shell.execute_reply.started":"2024-07-06T11:41:32.901600Z","shell.execute_reply":"2024-07-06T11:41:33.695694Z"},"trusted":true},"execution_count":22,"outputs":[]},{"cell_type":"markdown","source":"READY to TRAIN..","metadata":{}},{"cell_type":"code","source":"trainer.train()","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:41:53.245016Z","iopub.execute_input":"2024-07-06T11:41:53.245371Z","iopub.status.idle":"2024-07-06T13:35:17.541816Z","shell.execute_reply.started":"2024-07-06T11:41:53.245343Z","shell.execute_reply":"2024-07-06T13:35:17.540756Z"},"trusted":true},"execution_count":23,"outputs":[{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:","output_type":"stream"},{"output_type":"stream","name":"stdin","text":"  路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路路\n"},{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"wandb version 0.17.4 is available!  To upgrade, please run:\n $ pip install wandb --upgrade"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Tracking run with wandb version 0.17.0"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Run data is saved locally in <code>/kaggle/working/wandb/run-20240706_114220-bb9lw437</code>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Syncing run <strong><a href='https://wandb.ai/almazed9-SEC/huggingface/runs/bb9lw437' target=\"_blank\">bert-base-uncased-finetune-sst2</a></strong> to <a href='https://wandb.ai/almazed9-SEC/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View project at <a href='https://wandb.ai/almazed9-SEC/huggingface' target=\"_blank\">https://wandb.ai/almazed9-SEC/huggingface</a>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View run at <a href='https://wandb.ai/almazed9-SEC/huggingface/runs/bb9lw437' target=\"_blank\">https://wandb.ai/almazed9-SEC/huggingface/runs/bb9lw437</a>"},"metadata":{}},{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n  warnings.warn('Was asked to gather along dimension 0, but all '\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"\n    <div>\n      \n      <progress value='4221' max='4221' style='width:300px; height:20px; vertical-align: middle;'></progress>\n      [4221/4221 1:52:37, Epoch 3/3]\n    </div>\n    <table border=\"1\" class=\"dataframe\">\n  <thead>\n <tr style=\"text-align: left;\">\n      <th>Epoch</th>\n      <th>Training Loss</th>\n      <th>Validation Loss</th>\n      <th>Accuracy</th>\n      <th>F1</th>\n      <th>Precision</th>\n      <th>Recall</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>1</td>\n      <td>1.009200</td>\n      <td>0.948270</td>\n      <td>0.576900</td>\n      <td>0.571940</td>\n      <td>0.572744</td>\n      <td>0.576900</td>\n    </tr>\n    <tr>\n      <td>2</td>\n      <td>0.883100</td>\n      <td>0.937286</td>\n      <td>0.582600</td>\n      <td>0.581743</td>\n      <td>0.584837</td>\n      <td>0.582600</td>\n    </tr>\n    <tr>\n      <td>3</td>\n      <td>0.773800</td>\n      <td>0.975922</td>\n      <td>0.582000</td>\n      <td>0.580953</td>\n      <td>0.581424</td>\n      <td>0.582000</td>\n    </tr>\n  </tbody>\n</table><p>"},"metadata":{}},{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n  warnings.warn('Was asked to gather along dimension 0, but all '\n/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n  warnings.warn('Was asked to gather along dimension 0, but all '\n","output_type":"stream"},{"execution_count":23,"output_type":"execute_result","data":{"text/plain":"TrainOutput(global_step=4221, training_loss=0.8959762511787694, metrics={'train_runtime': 6804.0045, 'train_samples_per_second': 39.683, 'train_steps_per_second': 0.62, 'total_flos': 3.57681111552e+16, 'train_loss': 0.8959762511787694, 'epoch': 3.0})"},"metadata":{}}]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}