mazed commited on
Commit
01b66c2
โ€ข
1 Parent(s): b66073c

Upload 3 files

Browse files
01. Data Preparation.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
02. Model Training.ipynb ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata":{"kernelspec":{"language":"python","display_name":"Python 3","name":"python3"},"language_info":{"name":"python","version":"3.10.13","mimetype":"text/x-python","codemirror_mode":{"name":"ipython","version":3},"pygments_lexer":"ipython3","nbconvert_exporter":"python","file_extension":".py"},"kaggle":{"accelerator":"nvidiaTeslaT4","dataSources":[],"dockerImageVersionId":30733,"isInternetEnabled":true,"language":"python","sourceType":"notebook","isGpuEnabled":true}},"nbformat_minor":4,"nbformat":4,"cells":[{"cell_type":"markdown","source":"## Load dataset","metadata":{}},{"cell_type":"code","source":"from datasets import load_dataset","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:39:56.584242Z","iopub.execute_input":"2024-07-06T11:39:56.584593Z","iopub.status.idle":"2024-07-06T11:39:58.156952Z","shell.execute_reply.started":"2024-07-06T11:39:56.584562Z","shell.execute_reply":"2024-07-06T11:39:58.155992Z"},"trusted":true},"execution_count":1,"outputs":[]},{"cell_type":"code","source":"shoe_dataset = load_dataset(\"mazed/amazon_shoe_review\")","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:39:58.158713Z","iopub.execute_input":"2024-07-06T11:39:58.159533Z","iopub.status.idle":"2024-07-06T11:40:01.257551Z","shell.execute_reply.started":"2024-07-06T11:39:58.159500Z","shell.execute_reply":"2024-07-06T11:40:01.256701Z"},"trusted":true},"execution_count":2,"outputs":[{"output_type":"display_data","data":{"text/plain":"Downloading readme: 0%| | 0.00/456 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"412edfda83f74b319c351e3e38612e97"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading data: 0%| | 0.00/10.1M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"62223b07878d48c9921a4bba61638cff"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Downloading data: 0%| | 0.00/1.11M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"c7feddfa7cdd47d08233c3aafd5e35ac"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating train split: 0%| | 0/90000 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"dbc9650a4e904800af19b0af8c2cf71b"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Generating test split: 0%| | 0/10000 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5b187d257eb949faa5c9bd2ffa889fd6"}},"metadata":{}}]},{"cell_type":"code","source":"shoe_dataset","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:01.258601Z","iopub.execute_input":"2024-07-06T11:40:01.258922Z","iopub.status.idle":"2024-07-06T11:40:01.266386Z","shell.execute_reply.started":"2024-07-06T11:40:01.258896Z","shell.execute_reply":"2024-07-06T11:40:01.265351Z"},"trusted":true},"execution_count":3,"outputs":[{"execution_count":3,"output_type":"execute_result","data":{"text/plain":"DatasetDict({\n train: Dataset({\n features: ['labels', 'text'],\n num_rows: 90000\n })\n test: Dataset({\n features: ['labels', 'text'],\n num_rows: 10000\n })\n})"},"metadata":{}}]},{"cell_type":"code","source":"shoe_dataset[\"train\"][0]","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:01.268996Z","iopub.execute_input":"2024-07-06T11:40:01.269313Z","iopub.status.idle":"2024-07-06T11:40:01.307838Z","shell.execute_reply.started":"2024-07-06T11:40:01.269277Z","shell.execute_reply":"2024-07-06T11:40:01.306874Z"},"trusted":true},"execution_count":4,"outputs":[{"execution_count":4,"output_type":"execute_result","data":{"text/plain":"{'labels': 1,\n 'text': \"Material looks cheaper than what I expected. Doesn't seem like real quality leather.\"}"},"metadata":{}}]},{"cell_type":"markdown","source":"## Train","metadata":{}},{"cell_type":"code","source":"import transformers","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:01.309006Z","iopub.execute_input":"2024-07-06T11:40:01.309354Z","iopub.status.idle":"2024-07-06T11:40:04.611923Z","shell.execute_reply.started":"2024-07-06T11:40:01.309323Z","shell.execute_reply":"2024-07-06T11:40:04.611040Z"},"trusted":true},"execution_count":5,"outputs":[]},{"cell_type":"code","source":"from transformers import (\nAutoModelForSequenceClassification,\nAutoTokenizer,\nTrainer,\nTrainingArguments,\n)","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:04.613114Z","iopub.execute_input":"2024-07-06T11:40:04.613764Z","iopub.status.idle":"2024-07-06T11:40:16.018456Z","shell.execute_reply.started":"2024-07-06T11:40:04.613728Z","shell.execute_reply":"2024-07-06T11:40:16.017695Z"},"trusted":true},"execution_count":6,"outputs":[{"name":"stderr","text":"2024-07-06 11:40:07.714038: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n2024-07-06 11:40:07.714148: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n2024-07-06 11:40:07.799139: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n","output_type":"stream"}]},{"cell_type":"code","source":"train_dataset = shoe_dataset['train']\nvalid_dataset = shoe_dataset['test']","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:16.019449Z","iopub.execute_input":"2024-07-06T11:40:16.019980Z","iopub.status.idle":"2024-07-06T11:40:16.024606Z","shell.execute_reply.started":"2024-07-06T11:40:16.019955Z","shell.execute_reply":"2024-07-06T11:40:16.023614Z"},"trusted":true},"execution_count":7,"outputs":[]},{"cell_type":"code","source":"train_dataset","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:16.025896Z","iopub.execute_input":"2024-07-06T11:40:16.026235Z","iopub.status.idle":"2024-07-06T11:40:16.046752Z","shell.execute_reply.started":"2024-07-06T11:40:16.026205Z","shell.execute_reply":"2024-07-06T11:40:16.045870Z"},"trusted":true},"execution_count":8,"outputs":[{"execution_count":8,"output_type":"execute_result","data":{"text/plain":"Dataset({\n features: ['labels', 'text'],\n num_rows: 90000\n})"},"metadata":{}}]},{"cell_type":"code","source":"valid_dataset","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:16.047662Z","iopub.execute_input":"2024-07-06T11:40:16.047963Z","iopub.status.idle":"2024-07-06T11:40:16.055090Z","shell.execute_reply.started":"2024-07-06T11:40:16.047932Z","shell.execute_reply":"2024-07-06T11:40:16.054115Z"},"trusted":true},"execution_count":9,"outputs":[{"execution_count":9,"output_type":"execute_result","data":{"text/plain":"Dataset({\n features: ['labels', 'text'],\n num_rows: 10000\n})"},"metadata":{}}]},{"cell_type":"markdown","source":"Define a function to compute different metrics.","metadata":{}},{"cell_type":"code","source":"from sklearn.metrics import accuracy_score, precision_recall_fscore_support","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:16.058814Z","iopub.execute_input":"2024-07-06T11:40:16.059106Z","iopub.status.idle":"2024-07-06T11:40:16.065546Z","shell.execute_reply.started":"2024-07-06T11:40:16.059084Z","shell.execute_reply":"2024-07-06T11:40:16.064790Z"},"trusted":true},"execution_count":10,"outputs":[]},{"cell_type":"code","source":"def compute_metrics(pred):\n labels = pred.label_ids\n preds = pred.predictions.argmax(-1)\n precision, recall, f1, _ = precision_recall_fscore_support(labels, preds, average='weighted')\n acc = accuracy_score(labels, preds)\n return {\"accuracy\": acc, \"f1\":f1, \"precision\": precision, \"recall\": recall}","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:16.066824Z","iopub.execute_input":"2024-07-06T11:40:16.067093Z","iopub.status.idle":"2024-07-06T11:40:16.073528Z","shell.execute_reply.started":"2024-07-06T11:40:16.067071Z","shell.execute_reply":"2024-07-06T11:40:16.072862Z"},"trusted":true},"execution_count":11,"outputs":[]},{"cell_type":"markdown","source":"Let's download the base model and its tokenizer from the Hugging Face Hub.","metadata":{}},{"cell_type":"code","source":"base_model_id = \"distilbert-base-uncased\"\nnum_labels = 5","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:16.074639Z","iopub.execute_input":"2024-07-06T11:40:16.075438Z","iopub.status.idle":"2024-07-06T11:40:16.081021Z","shell.execute_reply.started":"2024-07-06T11:40:16.075406Z","shell.execute_reply":"2024-07-06T11:40:16.079893Z"},"trusted":true},"execution_count":12,"outputs":[]},{"cell_type":"code","source":"model = AutoModelForSequenceClassification.from_pretrained(base_model_id, num_labels=num_labels)\ntokenizer = AutoTokenizer.from_pretrained(base_model_id)","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:16.082025Z","iopub.execute_input":"2024-07-06T11:40:16.082303Z","iopub.status.idle":"2024-07-06T11:40:20.918564Z","shell.execute_reply.started":"2024-07-06T11:40:16.082281Z","shell.execute_reply":"2024-07-06T11:40:20.917793Z"},"trusted":true},"execution_count":13,"outputs":[{"output_type":"display_data","data":{"text/plain":"config.json: 0%| | 0.00/483 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"0c6c35761e27415ab0d1b26978d516f7"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"model.safetensors: 0%| | 0.00/268M [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"1813d959f4414f529de98a5f1246c001"}},"metadata":{}},{"name":"stderr","text":"Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']\nYou should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"tokenizer_config.json: 0%| | 0.00/48.0 [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"71fe18fee3de41778a89582ab748aa13"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"vocab.txt: 0%| | 0.00/232k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"10bef65cc9064fafa0f47bc9fdb0584b"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"tokenizer.json: 0%| | 0.00/466k [00:00<?, ?B/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"5c70807dcacc4e749cf2cb69f92df18c"}},"metadata":{}}]},{"cell_type":"code","source":"sample_text=\"This is a sample text.\"","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:20.919731Z","iopub.execute_input":"2024-07-06T11:40:20.920023Z","iopub.status.idle":"2024-07-06T11:40:20.924181Z","shell.execute_reply.started":"2024-07-06T11:40:20.919999Z","shell.execute_reply":"2024-07-06T11:40:20.923129Z"},"trusted":true},"execution_count":14,"outputs":[]},{"cell_type":"code","source":"encoded_sample_text=tokenizer(sample_text)","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:20.925200Z","iopub.execute_input":"2024-07-06T11:40:20.925780Z","iopub.status.idle":"2024-07-06T11:40:20.935207Z","shell.execute_reply.started":"2024-07-06T11:40:20.925753Z","shell.execute_reply":"2024-07-06T11:40:20.934371Z"},"trusted":true},"execution_count":15,"outputs":[]},{"cell_type":"code","source":"encoded_sample_text","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:20.936375Z","iopub.execute_input":"2024-07-06T11:40:20.937028Z","iopub.status.idle":"2024-07-06T11:40:20.943328Z","shell.execute_reply.started":"2024-07-06T11:40:20.936997Z","shell.execute_reply":"2024-07-06T11:40:20.942359Z"},"trusted":true},"execution_count":16,"outputs":[{"execution_count":16,"output_type":"execute_result","data":{"text/plain":"{'input_ids': [101, 2023, 2003, 1037, 7099, 3793, 1012, 102], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1]}"},"metadata":{}}]},{"cell_type":"markdown","source":"Now, we define a function to tokenize the datasets.","metadata":{}},{"cell_type":"code","source":"def tokenize(batch):\n return tokenizer(batch['text'], padding='max_length', truncation=True)\n","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:20.944499Z","iopub.execute_input":"2024-07-06T11:40:20.945119Z","iopub.status.idle":"2024-07-06T11:40:20.949798Z","shell.execute_reply.started":"2024-07-06T11:40:20.945088Z","shell.execute_reply":"2024-07-06T11:40:20.948821Z"},"trusted":true},"execution_count":17,"outputs":[]},{"cell_type":"code","source":"# # Define the tokenize function\n# def tokenize(batch):\n# try:\n# # Tokenize the text and return the tokenized output\n# tokenized = tokenizer(batch['text'], padding='max_length', truncation=True)\n# return tokenized\n# except Exception as e:\n# print(f\"Error tokenizing batch: {e}\")\n# print(batch['text'])\n# return {}","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:20.950989Z","iopub.execute_input":"2024-07-06T11:40:20.951346Z","iopub.status.idle":"2024-07-06T11:40:20.958513Z","shell.execute_reply.started":"2024-07-06T11:40:20.951317Z","shell.execute_reply":"2024-07-06T11:40:20.957759Z"},"trusted":true},"execution_count":18,"outputs":[]},{"cell_type":"code","source":"train_dataset = train_dataset.map(tokenize, batched=True, batch_size=len(train_dataset))\nvalid_dataset = valid_dataset.map(tokenize, batched=True, batch_size=len(valid_dataset))","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:40:20.959481Z","iopub.execute_input":"2024-07-06T11:40:20.959791Z","iopub.status.idle":"2024-07-06T11:40:58.433932Z","shell.execute_reply.started":"2024-07-06T11:40:20.959768Z","shell.execute_reply":"2024-07-06T11:40:58.433027Z"},"trusted":true},"execution_count":19,"outputs":[{"output_type":"display_data","data":{"text/plain":"Map: 0%| | 0/90000 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"485dc457c24d4c7db98150fe42bd77f0"}},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"Map: 0%| | 0/10000 [00:00<?, ? examples/s]","application/vnd.jupyter.widget-view+json":{"version_major":2,"version_minor":0,"model_id":"21553dd3952c4e9eb4e8d3c3888f46d8"}},"metadata":{}}]},{"cell_type":"markdown","source":"Define the TrainingArguments for our training job: hyperparameters, wherere to save the model etc.","metadata":{}},{"cell_type":"code","source":"epochs = 3\nlearning_rate = 5e-5\ntrain_batch_size = 32\neval_batch_size = 32\nsave_strategy = 'epoch'","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:41:07.493826Z","iopub.execute_input":"2024-07-06T11:41:07.494563Z","iopub.status.idle":"2024-07-06T11:41:07.499061Z","shell.execute_reply.started":"2024-07-06T11:41:07.494532Z","shell.execute_reply":"2024-07-06T11:41:07.497915Z"},"trusted":true},"execution_count":20,"outputs":[]},{"cell_type":"code","source":"training_args = TrainingArguments(\noutput_dir=\"/kaggle/working/\",\nrun_name=\"bert-base-uncased-finetune-sst2\",\nnum_train_epochs=epochs,\nper_device_train_batch_size=train_batch_size,\nper_device_eval_batch_size=eval_batch_size,\nsave_strategy=save_strategy,\neval_strategy='epoch',\nlearning_rate=learning_rate,\n)","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:41:23.757731Z","iopub.execute_input":"2024-07-06T11:41:23.758089Z","iopub.status.idle":"2024-07-06T11:41:23.869637Z","shell.execute_reply.started":"2024-07-06T11:41:23.758062Z","shell.execute_reply":"2024-07-06T11:41:23.868906Z"},"trusted":true},"execution_count":21,"outputs":[]},{"cell_type":"markdown","source":"Now, we use the trainer object to put all the pieces together.","metadata":{}},{"cell_type":"code","source":"trainer= Trainer(\nmodel=model,\nargs=training_args,\ntokenizer=tokenizer,\ncompute_metrics=compute_metrics,\ntrain_dataset=train_dataset,\neval_dataset=valid_dataset,\n)","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:41:32.901266Z","iopub.execute_input":"2024-07-06T11:41:32.901629Z","iopub.status.idle":"2024-07-06T11:41:33.696441Z","shell.execute_reply.started":"2024-07-06T11:41:32.901600Z","shell.execute_reply":"2024-07-06T11:41:33.695694Z"},"trusted":true},"execution_count":22,"outputs":[]},{"cell_type":"markdown","source":"READY to TRAIN..","metadata":{}},{"cell_type":"code","source":"trainer.train()","metadata":{"execution":{"iopub.status.busy":"2024-07-06T11:41:53.245016Z","iopub.execute_input":"2024-07-06T11:41:53.245371Z","iopub.status.idle":"2024-07-06T13:35:17.541816Z","shell.execute_reply.started":"2024-07-06T11:41:53.245343Z","shell.execute_reply":"2024-07-06T13:35:17.540756Z"},"trusted":true},"execution_count":23,"outputs":[{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n\u001b[34m\u001b[1mwandb\u001b[0m: You can find your API key in your browser here: https://wandb.ai/authorize\n\u001b[34m\u001b[1mwandb\u001b[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:","output_type":"stream"},{"output_type":"stream","name":"stdin","text":" ยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยทยท\n"},{"name":"stderr","text":"\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"wandb version 0.17.4 is available! To upgrade, please run:\n $ pip install wandb --upgrade"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Tracking run with wandb version 0.17.0"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Run data is saved locally in <code>/kaggle/working/wandb/run-20240706_114220-bb9lw437</code>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"Syncing run <strong><a href='https://wandb.ai/almazed9-SEC/huggingface/runs/bb9lw437' target=\"_blank\">bert-base-uncased-finetune-sst2</a></strong> to <a href='https://wandb.ai/almazed9-SEC/huggingface' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View project at <a href='https://wandb.ai/almazed9-SEC/huggingface' target=\"_blank\">https://wandb.ai/almazed9-SEC/huggingface</a>"},"metadata":{}},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":" View run at <a href='https://wandb.ai/almazed9-SEC/huggingface/runs/bb9lw437' target=\"_blank\">https://wandb.ai/almazed9-SEC/huggingface/runs/bb9lw437</a>"},"metadata":{}},{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n warnings.warn('Was asked to gather along dimension 0, but all '\n","output_type":"stream"},{"output_type":"display_data","data":{"text/plain":"<IPython.core.display.HTML object>","text/html":"\n <div>\n \n <progress value='4221' max='4221' style='width:300px; height:20px; vertical-align: middle;'></progress>\n [4221/4221 1:52:37, Epoch 3/3]\n </div>\n <table border=\"1\" class=\"dataframe\">\n <thead>\n <tr style=\"text-align: left;\">\n <th>Epoch</th>\n <th>Training Loss</th>\n <th>Validation Loss</th>\n <th>Accuracy</th>\n <th>F1</th>\n <th>Precision</th>\n <th>Recall</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <td>1</td>\n <td>1.009200</td>\n <td>0.948270</td>\n <td>0.576900</td>\n <td>0.571940</td>\n <td>0.572744</td>\n <td>0.576900</td>\n </tr>\n <tr>\n <td>2</td>\n <td>0.883100</td>\n <td>0.937286</td>\n <td>0.582600</td>\n <td>0.581743</td>\n <td>0.584837</td>\n <td>0.582600</td>\n </tr>\n <tr>\n <td>3</td>\n <td>0.773800</td>\n <td>0.975922</td>\n <td>0.582000</td>\n <td>0.580953</td>\n <td>0.581424</td>\n <td>0.582000</td>\n </tr>\n </tbody>\n</table><p>"},"metadata":{}},{"name":"stderr","text":"/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n warnings.warn('Was asked to gather along dimension 0, but all '\n/opt/conda/lib/python3.10/site-packages/torch/nn/parallel/_functions.py:68: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.\n warnings.warn('Was asked to gather along dimension 0, but all '\n","output_type":"stream"},{"execution_count":23,"output_type":"execute_result","data":{"text/plain":"TrainOutput(global_step=4221, training_loss=0.8959762511787694, metrics={'train_runtime': 6804.0045, 'train_samples_per_second': 39.683, 'train_steps_per_second': 0.62, 'total_flos': 3.57681111552e+16, 'train_loss': 0.8959762511787694, 'epoch': 3.0})"},"metadata":{}}]},{"cell_type":"code","source":"","metadata":{},"execution_count":null,"outputs":[]}]}
03. Save Model to Hub.ipynb ADDED
The diff for this file is too large to render. See raw diff