Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

README.md +95 -0
metadata.json +8 -0
model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
trainer_state.json +1514 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,95 @@

+---
+license: apache-2.0
+base_model: google/vit-base-patch16-224
+tags:
+- Image Regression
+datasets:
+- "LucyintheSky/24-5-10_24-5-17-2000"
+metrics:
+- accuracy
+model-index:
+- name: "24-5-10_24-5-17-2000-pred1"
+  results: []
+---
+# 24-5-10_24-5-17-2000-pred1
+## Image Regression Model
+This model was trained with [Image Regression Model Trainer](https://github.com/TonyAssi/ImageRegression/tree/main). It takes an image as input and outputs a float value.
+```python
+from ImageRegression import predict
+predict(repo_id='LucyintheSky/24-5-10_24-5-17-2000-pred1',image_path='image.jpg')
+```
+---
+## Dataset
+Dataset: LucyintheSky/24-5-10_24-5-17-2000\
+Value Column: 'sales_index'\
+Train Test Split: 0.2
+---
+## Training
+Base Model: [google/vit-base-patch16-224](https://huggingface.co/google/vit-base-patch16-224)\
+Epochs: 10\
+Learning Rate: 0.0001
+---
+## Usage
+### Download
+```bash
+git clone https://github.com/TonyAssi/ImageRegression.git
+cd ImageRegression
+```
+### Installation
+```bash
+pip install -r requirements.txt
+```
+### Import
+```python
+from ImageRegression import train_model, upload_model, predict
+```
+### Inference (Prediction)
+- **repo_id** 🤗 repo id of the model
+- **image_path** path to image
+```python
+predict(repo_id='LucyintheSky/24-5-10_24-5-17-2000-pred1',
+        image_path='image.jpg')
+```
+The first time this function is called it'll download the safetensor model. Subsequent function calls will run faster.
+### Train Model
+- **dataset_id** 🤗 dataset id
+- **value_column_name** column name of prediction values in dataset
+- **test_split** test split of the train/test split
+- **output_dir** the directory where the checkpoints will be saved
+- **num_train_epochs** training epochs
+- **learning_rate** learning rate
+```python
+train_model(dataset_id='LucyintheSky/24-5-10_24-5-17-2000',
+            value_column_name='sales_index',
+            test_split=0.2,
+            output_dir='./results',
+            num_train_epochs=10,
+            learning_rate=0.0001)
+```
+The trainer will save the checkpoints in the output_dir location. The model.safetensors are the trained weights you'll use for inference (predicton).
+### Upload Model
+This function will upload your model to the 🤗 Hub.
+- **model_id** the name of the model id
+- **token** go [here](https://huggingface.co/settings/tokens) to create a new 🤗 token
+- **checkpoint_dir** checkpoint folder that will be uploaded
+```python
+upload_model(model_id='24-5-10_24-5-17-2000-pred1',
+             token='YOUR_HF_TOKEN',
+             checkpoint_dir='./results/checkpoint-940')
+```

metadata.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "dataset_id": "LucyintheSky/24-5-10_24-5-17-2000",
+    "value_column_name": "sales_index",
+    "test_split": 0.2,
+    "num_train_epochs": 10,
+    "learning_rate": 0.0001,
+    "max_value": 2000
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e0f910445d999a66a30ca345aedab40494ba7475375297ca92770c2801e0d5e
+size 345583444

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9725d84ce2928bfc8ae737fc16ec219f82ae8811c3d0c866ca06dd85fa7aa73
+size 686557178

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da1db5c227c2000e391e1d225e13a38eda71746be2164bab198c44af9ae0882b
+size 13990

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e301ebd8064bb9799d23981baab716db6ee7302f5ba2efc70a2dd3be85fbcb46
+size 1064

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1514 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 10.0,
+  "eval_steps": 500,
+  "global_step": 2000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05,
+      "grad_norm": 7.181795120239258,
+      "learning_rate": 9.95e-05,
+      "loss": 0.5278,
+      "step": 10
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 10.308942794799805,
+      "learning_rate": 9.900000000000001e-05,
+      "loss": 0.085,
+      "step": 20
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 12.229366302490234,
+      "learning_rate": 9.850000000000001e-05,
+      "loss": 0.0915,
+      "step": 30
+    },
+    {
+      "epoch": 0.2,
+      "grad_norm": 2.2935550212860107,
+      "learning_rate": 9.8e-05,
+      "loss": 0.0936,
+      "step": 40
+    },
+    {
+      "epoch": 0.25,
+      "grad_norm": 4.0055365562438965,
+      "learning_rate": 9.75e-05,
+      "loss": 0.0847,
+      "step": 50
+    },
+    {
+      "epoch": 0.3,
+      "grad_norm": 2.5626087188720703,
+      "learning_rate": 9.7e-05,
+      "loss": 0.104,
+      "step": 60
+    },
+    {
+      "epoch": 0.35,
+      "grad_norm": 4.1402435302734375,
+      "learning_rate": 9.65e-05,
+      "loss": 0.0736,
+      "step": 70
+    },
+    {
+      "epoch": 0.4,
+      "grad_norm": 2.0823185443878174,
+      "learning_rate": 9.6e-05,
+      "loss": 0.084,
+      "step": 80
+    },
+    {
+      "epoch": 0.45,
+      "grad_norm": 2.480923891067505,
+      "learning_rate": 9.55e-05,
+      "loss": 0.0854,
+      "step": 90
+    },
+    {
+      "epoch": 0.5,
+      "grad_norm": 6.268786907196045,
+      "learning_rate": 9.5e-05,
+      "loss": 0.0662,
+      "step": 100
+    },
+    {
+      "epoch": 0.55,
+      "grad_norm": 5.55922269821167,
+      "learning_rate": 9.449999999999999e-05,
+      "loss": 0.0651,
+      "step": 110
+    },
+    {
+      "epoch": 0.6,
+      "grad_norm": 6.762533664703369,
+      "learning_rate": 9.4e-05,
+      "loss": 0.0733,
+      "step": 120
+    },
+    {
+      "epoch": 0.65,
+      "grad_norm": 4.081423282623291,
+      "learning_rate": 9.350000000000001e-05,
+      "loss": 0.077,
+      "step": 130
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 0.7577868700027466,
+      "learning_rate": 9.300000000000001e-05,
+      "loss": 0.0683,
+      "step": 140
+    },
+    {
+      "epoch": 0.75,
+      "grad_norm": 2.6608283519744873,
+      "learning_rate": 9.250000000000001e-05,
+      "loss": 0.088,
+      "step": 150
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 1.7237794399261475,
+      "learning_rate": 9.200000000000001e-05,
+      "loss": 0.1042,
+      "step": 160
+    },
+    {
+      "epoch": 0.85,
+      "grad_norm": 1.696759819984436,
+      "learning_rate": 9.15e-05,
+      "loss": 0.0658,
+      "step": 170
+    },
+    {
+      "epoch": 0.9,
+      "grad_norm": 2.3527326583862305,
+      "learning_rate": 9.1e-05,
+      "loss": 0.1189,
+      "step": 180
+    },
+    {
+      "epoch": 0.95,
+      "grad_norm": 8.625548362731934,
+      "learning_rate": 9.05e-05,
+      "loss": 0.0969,
+      "step": 190
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 12.219880104064941,
+      "learning_rate": 9e-05,
+      "loss": 0.0886,
+      "step": 200
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.05805354192852974,
+      "eval_mse": 0.05805354192852974,
+      "eval_runtime": 16.5046,
+      "eval_samples_per_second": 24.236,
+      "eval_steps_per_second": 3.029,
+      "step": 200
+    },
+    {
+      "epoch": 1.05,
+      "grad_norm": 11.77884292602539,
+      "learning_rate": 8.950000000000001e-05,
+      "loss": 0.0646,
+      "step": 210
+    },
+    {
+      "epoch": 1.1,
+      "grad_norm": 4.1980414390563965,
+      "learning_rate": 8.900000000000001e-05,
+      "loss": 0.0852,
+      "step": 220
+    },
+    {
+      "epoch": 1.15,
+      "grad_norm": 0.837498128414154,
+      "learning_rate": 8.850000000000001e-05,
+      "loss": 0.1304,
+      "step": 230
+    },
+    {
+      "epoch": 1.2,
+      "grad_norm": 3.922130584716797,
+      "learning_rate": 8.800000000000001e-05,
+      "loss": 0.0777,
+      "step": 240
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 1.215814471244812,
+      "learning_rate": 8.75e-05,
+      "loss": 0.0479,
+      "step": 250
+    },
+    {
+      "epoch": 1.3,
+      "grad_norm": 6.36415958404541,
+      "learning_rate": 8.7e-05,
+      "loss": 0.0546,
+      "step": 260
+    },
+    {
+      "epoch": 1.35,
+      "grad_norm": 4.46746826171875,
+      "learning_rate": 8.65e-05,
+      "loss": 0.0577,
+      "step": 270
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 1.044565200805664,
+      "learning_rate": 8.6e-05,
+      "loss": 0.0759,
+      "step": 280
+    },
+    {
+      "epoch": 1.45,
+      "grad_norm": 7.3412065505981445,
+      "learning_rate": 8.55e-05,
+      "loss": 0.0662,
+      "step": 290
+    },
+    {
+      "epoch": 1.5,
+      "grad_norm": 2.7329416275024414,
+      "learning_rate": 8.5e-05,
+      "loss": 0.0531,
+      "step": 300
+    },
+    {
+      "epoch": 1.55,
+      "grad_norm": 4.7028093338012695,
+      "learning_rate": 8.450000000000001e-05,
+      "loss": 0.0603,
+      "step": 310
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 2.881957530975342,
+      "learning_rate": 8.4e-05,
+      "loss": 0.0499,
+      "step": 320
+    },
+    {
+      "epoch": 1.65,
+      "grad_norm": 5.929530620574951,
+      "learning_rate": 8.35e-05,
+      "loss": 0.079,
+      "step": 330
+    },
+    {
+      "epoch": 1.7,
+      "grad_norm": 2.1263890266418457,
+      "learning_rate": 8.3e-05,
+      "loss": 0.0779,
+      "step": 340
+    },
+    {
+      "epoch": 1.75,
+      "grad_norm": 6.260400772094727,
+      "learning_rate": 8.25e-05,
+      "loss": 0.0724,
+      "step": 350
+    },
+    {
+      "epoch": 1.8,
+      "grad_norm": 8.152369499206543,
+      "learning_rate": 8.2e-05,
+      "loss": 0.1059,
+      "step": 360
+    },
+    {
+      "epoch": 1.85,
+      "grad_norm": 2.842404842376709,
+      "learning_rate": 8.15e-05,
+      "loss": 0.0552,
+      "step": 370
+    },
+    {
+      "epoch": 1.9,
+      "grad_norm": 6.334738254547119,
+      "learning_rate": 8.1e-05,
+      "loss": 0.0778,
+      "step": 380
+    },
+    {
+      "epoch": 1.95,
+      "grad_norm": 2.1208484172821045,
+      "learning_rate": 8.05e-05,
+      "loss": 0.063,
+      "step": 390
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 3.39943265914917,
+      "learning_rate": 8e-05,
+      "loss": 0.0632,
+      "step": 400
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.09050198644399643,
+      "eval_mse": 0.09050198644399643,
+      "eval_runtime": 16.3832,
+      "eval_samples_per_second": 24.415,
+      "eval_steps_per_second": 3.052,
+      "step": 400
+    },
+    {
+      "epoch": 2.05,
+      "grad_norm": 0.5936372876167297,
+      "learning_rate": 7.950000000000001e-05,
+      "loss": 0.0479,
+      "step": 410
+    },
+    {
+      "epoch": 2.1,
+      "grad_norm": 1.255616307258606,
+      "learning_rate": 7.900000000000001e-05,
+      "loss": 0.0503,
+      "step": 420
+    },
+    {
+      "epoch": 2.15,
+      "grad_norm": 0.4045727252960205,
+      "learning_rate": 7.850000000000001e-05,
+      "loss": 0.0685,
+      "step": 430
+    },
+    {
+      "epoch": 2.2,
+      "grad_norm": 9.539754867553711,
+      "learning_rate": 7.800000000000001e-05,
+      "loss": 0.0599,
+      "step": 440
+    },
+    {
+      "epoch": 2.25,
+      "grad_norm": 5.422886848449707,
+      "learning_rate": 7.75e-05,
+      "loss": 0.0563,
+      "step": 450
+    },
+    {
+      "epoch": 2.3,
+      "grad_norm": 0.7504540085792542,
+      "learning_rate": 7.7e-05,
+      "loss": 0.0445,
+      "step": 460
+    },
+    {
+      "epoch": 2.35,
+      "grad_norm": 1.4322071075439453,
+      "learning_rate": 7.65e-05,
+      "loss": 0.048,
+      "step": 470
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 3.965813636779785,
+      "learning_rate": 7.6e-05,
+      "loss": 0.0584,
+      "step": 480
+    },
+    {
+      "epoch": 2.45,
+      "grad_norm": 1.695728063583374,
+      "learning_rate": 7.55e-05,
+      "loss": 0.0523,
+      "step": 490
+    },
+    {
+      "epoch": 2.5,
+      "grad_norm": 7.276674747467041,
+      "learning_rate": 7.500000000000001e-05,
+      "loss": 0.0581,
+      "step": 500
+    },
+    {
+      "epoch": 2.55,
+      "grad_norm": 1.3874706029891968,
+      "learning_rate": 7.450000000000001e-05,
+      "loss": 0.0552,
+      "step": 510
+    },
+    {
+      "epoch": 2.6,
+      "grad_norm": 2.3332033157348633,
+      "learning_rate": 7.4e-05,
+      "loss": 0.0459,
+      "step": 520
+    },
+    {
+      "epoch": 2.65,
+      "grad_norm": 5.276487350463867,
+      "learning_rate": 7.35e-05,
+      "loss": 0.0552,
+      "step": 530
+    },
+    {
+      "epoch": 2.7,
+      "grad_norm": 1.5198229551315308,
+      "learning_rate": 7.3e-05,
+      "loss": 0.0437,
+      "step": 540
+    },
+    {
+      "epoch": 2.75,
+      "grad_norm": 2.460489273071289,
+      "learning_rate": 7.25e-05,
+      "loss": 0.0641,
+      "step": 550
+    },
+    {
+      "epoch": 2.8,
+      "grad_norm": 4.7155680656433105,
+      "learning_rate": 7.2e-05,
+      "loss": 0.0593,
+      "step": 560
+    },
+    {
+      "epoch": 2.85,
+      "grad_norm": 1.9343897104263306,
+      "learning_rate": 7.15e-05,
+      "loss": 0.0724,
+      "step": 570
+    },
+    {
+      "epoch": 2.9,
+      "grad_norm": 6.80872106552124,
+      "learning_rate": 7.1e-05,
+      "loss": 0.0641,
+      "step": 580
+    },
+    {
+      "epoch": 2.95,
+      "grad_norm": 3.6537954807281494,
+      "learning_rate": 7.05e-05,
+      "loss": 0.0565,
+      "step": 590
+    },
+    {
+      "epoch": 3.0,
+      "grad_norm": 0.5623217225074768,
+      "learning_rate": 7e-05,
+      "loss": 0.0706,
+      "step": 600
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.06641849875450134,
+      "eval_mse": 0.06641849130392075,
+      "eval_runtime": 16.3852,
+      "eval_samples_per_second": 24.412,
+      "eval_steps_per_second": 3.052,
+      "step": 600
+    },
+    {
+      "epoch": 3.05,
+      "grad_norm": 0.36932703852653503,
+      "learning_rate": 6.95e-05,
+      "loss": 0.0335,
+      "step": 610
+    },
+    {
+      "epoch": 3.1,
+      "grad_norm": 6.078632831573486,
+      "learning_rate": 6.9e-05,
+      "loss": 0.0263,
+      "step": 620
+    },
+    {
+      "epoch": 3.15,
+      "grad_norm": 1.8647630214691162,
+      "learning_rate": 6.850000000000001e-05,
+      "loss": 0.0361,
+      "step": 630
+    },
+    {
+      "epoch": 3.2,
+      "grad_norm": 2.3444764614105225,
+      "learning_rate": 6.800000000000001e-05,
+      "loss": 0.0318,
+      "step": 640
+    },
+    {
+      "epoch": 3.25,
+      "grad_norm": 3.742060899734497,
+      "learning_rate": 6.750000000000001e-05,
+      "loss": 0.0285,
+      "step": 650
+    },
+    {
+      "epoch": 3.3,
+      "grad_norm": 5.139005661010742,
+      "learning_rate": 6.7e-05,
+      "loss": 0.0282,
+      "step": 660
+    },
+    {
+      "epoch": 3.35,
+      "grad_norm": 2.5116076469421387,
+      "learning_rate": 6.65e-05,
+      "loss": 0.0508,
+      "step": 670
+    },
+    {
+      "epoch": 3.4,
+      "grad_norm": 4.835976600646973,
+      "learning_rate": 6.6e-05,
+      "loss": 0.0603,
+      "step": 680
+    },
+    {
+      "epoch": 3.45,
+      "grad_norm": 0.5137421488761902,
+      "learning_rate": 6.55e-05,
+      "loss": 0.0327,
+      "step": 690
+    },
+    {
+      "epoch": 3.5,
+      "grad_norm": 2.6399364471435547,
+      "learning_rate": 6.500000000000001e-05,
+      "loss": 0.0365,
+      "step": 700
+    },
+    {
+      "epoch": 3.55,
+      "grad_norm": 6.157520294189453,
+      "learning_rate": 6.450000000000001e-05,
+      "loss": 0.0298,
+      "step": 710
+    },
+    {
+      "epoch": 3.6,
+      "grad_norm": 4.332380294799805,
+      "learning_rate": 6.400000000000001e-05,
+      "loss": 0.0384,
+      "step": 720
+    },
+    {
+      "epoch": 3.65,
+      "grad_norm": 0.9350706338882446,
+      "learning_rate": 6.35e-05,
+      "loss": 0.0701,
+      "step": 730
+    },
+    {
+      "epoch": 3.7,
+      "grad_norm": 3.732862710952759,
+      "learning_rate": 6.3e-05,
+      "loss": 0.0401,
+      "step": 740
+    },
+    {
+      "epoch": 3.75,
+      "grad_norm": 0.5625308752059937,
+      "learning_rate": 6.25e-05,
+      "loss": 0.0446,
+      "step": 750
+    },
+    {
+      "epoch": 3.8,
+      "grad_norm": 1.4193575382232666,
+      "learning_rate": 6.2e-05,
+      "loss": 0.0484,
+      "step": 760
+    },
+    {
+      "epoch": 3.85,
+      "grad_norm": 3.158599376678467,
+      "learning_rate": 6.15e-05,
+      "loss": 0.0423,
+      "step": 770
+    },
+    {
+      "epoch": 3.9,
+      "grad_norm": 3.7658185958862305,
+      "learning_rate": 6.1e-05,
+      "loss": 0.0474,
+      "step": 780
+    },
+    {
+      "epoch": 3.95,
+      "grad_norm": 0.5808354616165161,
+      "learning_rate": 6.05e-05,
+      "loss": 0.0581,
+      "step": 790
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 3.4763364791870117,
+      "learning_rate": 6e-05,
+      "loss": 0.0262,
+      "step": 800
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.07426866888999939,
+      "eval_mse": 0.07426867634057999,
+      "eval_runtime": 16.4034,
+      "eval_samples_per_second": 24.385,
+      "eval_steps_per_second": 3.048,
+      "step": 800
+    },
+    {
+      "epoch": 4.05,
+      "grad_norm": 5.053278923034668,
+      "learning_rate": 5.95e-05,
+      "loss": 0.0417,
+      "step": 810
+    },
+    {
+      "epoch": 4.1,
+      "grad_norm": 0.44556859135627747,
+      "learning_rate": 5.9e-05,
+      "loss": 0.0251,
+      "step": 820
+    },
+    {
+      "epoch": 4.15,
+      "grad_norm": 1.782357931137085,
+      "learning_rate": 5.85e-05,
+      "loss": 0.0241,
+      "step": 830
+    },
+    {
+      "epoch": 4.2,
+      "grad_norm": 3.3760969638824463,
+      "learning_rate": 5.8e-05,
+      "loss": 0.0475,
+      "step": 840
+    },
+    {
+      "epoch": 4.25,
+      "grad_norm": 1.7383389472961426,
+      "learning_rate": 5.7499999999999995e-05,
+      "loss": 0.0322,
+      "step": 850
+    },
+    {
+      "epoch": 4.3,
+      "grad_norm": 1.0607361793518066,
+      "learning_rate": 5.6999999999999996e-05,
+      "loss": 0.0284,
+      "step": 860
+    },
+    {
+      "epoch": 4.35,
+      "grad_norm": 2.8519768714904785,
+      "learning_rate": 5.65e-05,
+      "loss": 0.0242,
+      "step": 870
+    },
+    {
+      "epoch": 4.4,
+      "grad_norm": 2.405836582183838,
+      "learning_rate": 5.6000000000000006e-05,
+      "loss": 0.0188,
+      "step": 880
+    },
+    {
+      "epoch": 4.45,
+      "grad_norm": 0.362431138753891,
+      "learning_rate": 5.550000000000001e-05,
+      "loss": 0.0239,
+      "step": 890
+    },
+    {
+      "epoch": 4.5,
+      "grad_norm": 3.0968267917633057,
+      "learning_rate": 5.500000000000001e-05,
+      "loss": 0.0259,
+      "step": 900
+    },
+    {
+      "epoch": 4.55,
+      "grad_norm": 1.1966663599014282,
+      "learning_rate": 5.45e-05,
+      "loss": 0.0253,
+      "step": 910
+    },
+    {
+      "epoch": 4.6,
+      "grad_norm": 3.1779186725616455,
+      "learning_rate": 5.4000000000000005e-05,
+      "loss": 0.0383,
+      "step": 920
+    },
+    {
+      "epoch": 4.65,
+      "grad_norm": 9.280014038085938,
+      "learning_rate": 5.3500000000000006e-05,
+      "loss": 0.046,
+      "step": 930
+    },
+    {
+      "epoch": 4.7,
+      "grad_norm": 5.865562915802002,
+      "learning_rate": 5.300000000000001e-05,
+      "loss": 0.0444,
+      "step": 940
+    },
+    {
+      "epoch": 4.75,
+      "grad_norm": 0.8096597790718079,
+      "learning_rate": 5.25e-05,
+      "loss": 0.0254,
+      "step": 950
+    },
+    {
+      "epoch": 4.8,
+      "grad_norm": 2.4865760803222656,
+      "learning_rate": 5.2000000000000004e-05,
+      "loss": 0.0265,
+      "step": 960
+    },
+    {
+      "epoch": 4.85,
+      "grad_norm": 1.709107518196106,
+      "learning_rate": 5.1500000000000005e-05,
+      "loss": 0.0207,
+      "step": 970
+    },
+    {
+      "epoch": 4.9,
+      "grad_norm": 4.747660160064697,
+      "learning_rate": 5.1000000000000006e-05,
+      "loss": 0.0238,
+      "step": 980
+    },
+    {
+      "epoch": 4.95,
+      "grad_norm": 2.693565845489502,
+      "learning_rate": 5.05e-05,
+      "loss": 0.0273,
+      "step": 990
+    },
+    {
+      "epoch": 5.0,
+      "grad_norm": 1.1513164043426514,
+      "learning_rate": 5e-05,
+      "loss": 0.0254,
+      "step": 1000
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 0.06622488796710968,
+      "eval_mse": 0.06622488796710968,
+      "eval_runtime": 16.3594,
+      "eval_samples_per_second": 24.451,
+      "eval_steps_per_second": 3.056,
+      "step": 1000
+    },
+    {
+      "epoch": 5.05,
+      "grad_norm": 1.512947678565979,
+      "learning_rate": 4.9500000000000004e-05,
+      "loss": 0.0165,
+      "step": 1010
+    },
+    {
+      "epoch": 5.1,
+      "grad_norm": 2.5008397102355957,
+      "learning_rate": 4.9e-05,
+      "loss": 0.0133,
+      "step": 1020
+    },
+    {
+      "epoch": 5.15,
+      "grad_norm": 2.0179872512817383,
+      "learning_rate": 4.85e-05,
+      "loss": 0.0202,
+      "step": 1030
+    },
+    {
+      "epoch": 5.2,
+      "grad_norm": 3.1750917434692383,
+      "learning_rate": 4.8e-05,
+      "loss": 0.0154,
+      "step": 1040
+    },
+    {
+      "epoch": 5.25,
+      "grad_norm": 2.2271130084991455,
+      "learning_rate": 4.75e-05,
+      "loss": 0.0247,
+      "step": 1050
+    },
+    {
+      "epoch": 5.3,
+      "grad_norm": 3.316953182220459,
+      "learning_rate": 4.7e-05,
+      "loss": 0.0185,
+      "step": 1060
+    },
+    {
+      "epoch": 5.35,
+      "grad_norm": 0.9691615104675293,
+      "learning_rate": 4.6500000000000005e-05,
+      "loss": 0.0183,
+      "step": 1070
+    },
+    {
+      "epoch": 5.4,
+      "grad_norm": 2.1814730167388916,
+      "learning_rate": 4.600000000000001e-05,
+      "loss": 0.0127,
+      "step": 1080
+    },
+    {
+      "epoch": 5.45,
+      "grad_norm": 0.5520786643028259,
+      "learning_rate": 4.55e-05,
+      "loss": 0.0134,
+      "step": 1090
+    },
+    {
+      "epoch": 5.5,
+      "grad_norm": 1.098179578781128,
+      "learning_rate": 4.5e-05,
+      "loss": 0.0136,
+      "step": 1100
+    },
+    {
+      "epoch": 5.55,
+      "grad_norm": 2.0275049209594727,
+      "learning_rate": 4.4500000000000004e-05,
+      "loss": 0.0186,
+      "step": 1110
+    },
+    {
+      "epoch": 5.6,
+      "grad_norm": 0.6353259086608887,
+      "learning_rate": 4.4000000000000006e-05,
+      "loss": 0.0105,
+      "step": 1120
+    },
+    {
+      "epoch": 5.65,
+      "grad_norm": 0.8149334192276001,
+      "learning_rate": 4.35e-05,
+      "loss": 0.0124,
+      "step": 1130
+    },
+    {
+      "epoch": 5.7,
+      "grad_norm": 0.48699653148651123,
+      "learning_rate": 4.3e-05,
+      "loss": 0.0165,
+      "step": 1140
+    },
+    {
+      "epoch": 5.75,
+      "grad_norm": 1.4988057613372803,
+      "learning_rate": 4.25e-05,
+      "loss": 0.0131,
+      "step": 1150
+    },
+    {
+      "epoch": 5.8,
+      "grad_norm": 1.8789329528808594,
+      "learning_rate": 4.2e-05,
+      "loss": 0.0139,
+      "step": 1160
+    },
+    {
+      "epoch": 5.85,
+      "grad_norm": 0.5368540287017822,
+      "learning_rate": 4.15e-05,
+      "loss": 0.0127,
+      "step": 1170
+    },
+    {
+      "epoch": 5.9,
+      "grad_norm": 1.9691675901412964,
+      "learning_rate": 4.1e-05,
+      "loss": 0.0154,
+      "step": 1180
+    },
+    {
+      "epoch": 5.95,
+      "grad_norm": 1.972278356552124,
+      "learning_rate": 4.05e-05,
+      "loss": 0.0134,
+      "step": 1190
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 0.720190167427063,
+      "learning_rate": 4e-05,
+      "loss": 0.0158,
+      "step": 1200
+    },
+    {
+      "epoch": 6.0,
+      "eval_loss": 0.061197929084300995,
+      "eval_mse": 0.061197929084300995,
+      "eval_runtime": 16.3287,
+      "eval_samples_per_second": 24.497,
+      "eval_steps_per_second": 3.062,
+      "step": 1200
+    },
+    {
+      "epoch": 6.05,
+      "grad_norm": 0.4566974937915802,
+      "learning_rate": 3.9500000000000005e-05,
+      "loss": 0.0069,
+      "step": 1210
+    },
+    {
+      "epoch": 6.1,
+      "grad_norm": 3.2127201557159424,
+      "learning_rate": 3.9000000000000006e-05,
+      "loss": 0.0113,
+      "step": 1220
+    },
+    {
+      "epoch": 6.15,
+      "grad_norm": 3.4719629287719727,
+      "learning_rate": 3.85e-05,
+      "loss": 0.0125,
+      "step": 1230
+    },
+    {
+      "epoch": 6.2,
+      "grad_norm": 0.5567223429679871,
+      "learning_rate": 3.8e-05,
+      "loss": 0.011,
+      "step": 1240
+    },
+    {
+      "epoch": 6.25,
+      "grad_norm": 1.591753363609314,
+      "learning_rate": 3.7500000000000003e-05,
+      "loss": 0.0099,
+      "step": 1250
+    },
+    {
+      "epoch": 6.3,
+      "grad_norm": 2.333648681640625,
+      "learning_rate": 3.7e-05,
+      "loss": 0.0106,
+      "step": 1260
+    },
+    {
+      "epoch": 6.35,
+      "grad_norm": 0.5757361650466919,
+      "learning_rate": 3.65e-05,
+      "loss": 0.0066,
+      "step": 1270
+    },
+    {
+      "epoch": 6.4,
+      "grad_norm": 0.1848592758178711,
+      "learning_rate": 3.6e-05,
+      "loss": 0.0075,
+      "step": 1280
+    },
+    {
+      "epoch": 6.45,
+      "grad_norm": 2.7622129917144775,
+      "learning_rate": 3.55e-05,
+      "loss": 0.0064,
+      "step": 1290
+    },
+    {
+      "epoch": 6.5,
+      "grad_norm": 2.2141900062561035,
+      "learning_rate": 3.5e-05,
+      "loss": 0.0058,
+      "step": 1300
+    },
+    {
+      "epoch": 6.55,
+      "grad_norm": 2.2228360176086426,
+      "learning_rate": 3.45e-05,
+      "loss": 0.0075,
+      "step": 1310
+    },
+    {
+      "epoch": 6.6,
+      "grad_norm": 1.4335148334503174,
+      "learning_rate": 3.4000000000000007e-05,
+      "loss": 0.0049,
+      "step": 1320
+    },
+    {
+      "epoch": 6.65,
+      "grad_norm": 5.4220781326293945,
+      "learning_rate": 3.35e-05,
+      "loss": 0.0097,
+      "step": 1330
+    },
+    {
+      "epoch": 6.7,
+      "grad_norm": 3.363415479660034,
+      "learning_rate": 3.3e-05,
+      "loss": 0.011,
+      "step": 1340
+    },
+    {
+      "epoch": 6.75,
+      "grad_norm": 0.620877742767334,
+      "learning_rate": 3.2500000000000004e-05,
+      "loss": 0.0085,
+      "step": 1350
+    },
+    {
+      "epoch": 6.8,
+      "grad_norm": 0.262657105922699,
+      "learning_rate": 3.2000000000000005e-05,
+      "loss": 0.0063,
+      "step": 1360
+    },
+    {
+      "epoch": 6.85,
+      "grad_norm": 0.18443162739276886,
+      "learning_rate": 3.15e-05,
+      "loss": 0.0056,
+      "step": 1370
+    },
+    {
+      "epoch": 6.9,
+      "grad_norm": 1.5864096879959106,
+      "learning_rate": 3.1e-05,
+      "loss": 0.0078,
+      "step": 1380
+    },
+    {
+      "epoch": 6.95,
+      "grad_norm": 0.2312205284833908,
+      "learning_rate": 3.05e-05,
+      "loss": 0.0078,
+      "step": 1390
+    },
+    {
+      "epoch": 7.0,
+      "grad_norm": 1.5744479894638062,
+      "learning_rate": 3e-05,
+      "loss": 0.0082,
+      "step": 1400
+    },
+    {
+      "epoch": 7.0,
+      "eval_loss": 0.05860908329486847,
+      "eval_mse": 0.058609090745449066,
+      "eval_runtime": 16.3726,
+      "eval_samples_per_second": 24.431,
+      "eval_steps_per_second": 3.054,
+      "step": 1400
+    },
+    {
+      "epoch": 7.05,
+      "grad_norm": 0.41979604959487915,
+      "learning_rate": 2.95e-05,
+      "loss": 0.0029,
+      "step": 1410
+    },
+    {
+      "epoch": 7.1,
+      "grad_norm": 1.5260108709335327,
+      "learning_rate": 2.9e-05,
+      "loss": 0.0048,
+      "step": 1420
+    },
+    {
+      "epoch": 7.15,
+      "grad_norm": 1.438675880432129,
+      "learning_rate": 2.8499999999999998e-05,
+      "loss": 0.0051,
+      "step": 1430
+    },
+    {
+      "epoch": 7.2,
+      "grad_norm": 1.684127688407898,
+      "learning_rate": 2.8000000000000003e-05,
+      "loss": 0.0041,
+      "step": 1440
+    },
+    {
+      "epoch": 7.25,
+      "grad_norm": 0.2706861197948456,
+      "learning_rate": 2.7500000000000004e-05,
+      "loss": 0.0035,
+      "step": 1450
+    },
+    {
+      "epoch": 7.3,
+      "grad_norm": 2.0461597442626953,
+      "learning_rate": 2.7000000000000002e-05,
+      "loss": 0.0052,
+      "step": 1460
+    },
+    {
+      "epoch": 7.35,
+      "grad_norm": 0.6794195771217346,
+      "learning_rate": 2.6500000000000004e-05,
+      "loss": 0.003,
+      "step": 1470
+    },
+    {
+      "epoch": 7.4,
+      "grad_norm": 2.5439083576202393,
+      "learning_rate": 2.6000000000000002e-05,
+      "loss": 0.0041,
+      "step": 1480
+    },
+    {
+      "epoch": 7.45,
+      "grad_norm": 0.684155285358429,
+      "learning_rate": 2.5500000000000003e-05,
+      "loss": 0.0043,
+      "step": 1490
+    },
+    {
+      "epoch": 7.5,
+      "grad_norm": 0.2224631905555725,
+      "learning_rate": 2.5e-05,
+      "loss": 0.0047,
+      "step": 1500
+    },
+    {
+      "epoch": 7.55,
+      "grad_norm": 0.820940375328064,
+      "learning_rate": 2.45e-05,
+      "loss": 0.0028,
+      "step": 1510
+    },
+    {
+      "epoch": 7.6,
+      "grad_norm": 0.22735460102558136,
+      "learning_rate": 2.4e-05,
+      "loss": 0.0043,
+      "step": 1520
+    },
+    {
+      "epoch": 7.65,
+      "grad_norm": 0.598775327205658,
+      "learning_rate": 2.35e-05,
+      "loss": 0.0035,
+      "step": 1530
+    },
+    {
+      "epoch": 7.7,
+      "grad_norm": 0.42806899547576904,
+      "learning_rate": 2.3000000000000003e-05,
+      "loss": 0.0038,
+      "step": 1540
+    },
+    {
+      "epoch": 7.75,
+      "grad_norm": 0.46802547574043274,
+      "learning_rate": 2.25e-05,
+      "loss": 0.003,
+      "step": 1550
+    },
+    {
+      "epoch": 7.8,
+      "grad_norm": 1.4230495691299438,
+      "learning_rate": 2.2000000000000003e-05,
+      "loss": 0.0043,
+      "step": 1560
+    },
+    {
+      "epoch": 7.85,
+      "grad_norm": 1.492274522781372,
+      "learning_rate": 2.15e-05,
+      "loss": 0.004,
+      "step": 1570
+    },
+    {
+      "epoch": 7.9,
+      "grad_norm": 0.22267311811447144,
+      "learning_rate": 2.1e-05,
+      "loss": 0.0034,
+      "step": 1580
+    },
+    {
+      "epoch": 7.95,
+      "grad_norm": 1.372755527496338,
+      "learning_rate": 2.05e-05,
+      "loss": 0.0041,
+      "step": 1590
+    },
+    {
+      "epoch": 8.0,
+      "grad_norm": 0.3921973407268524,
+      "learning_rate": 2e-05,
+      "loss": 0.004,
+      "step": 1600
+    },
+    {
+      "epoch": 8.0,
+      "eval_loss": 0.058916158974170685,
+      "eval_mse": 0.058916158974170685,
+      "eval_runtime": 16.4617,
+      "eval_samples_per_second": 24.299,
+      "eval_steps_per_second": 3.037,
+      "step": 1600
+    },
+    {
+      "epoch": 8.05,
+      "grad_norm": 0.9684560298919678,
+      "learning_rate": 1.9500000000000003e-05,
+      "loss": 0.0015,
+      "step": 1610
+    },
+    {
+      "epoch": 8.1,
+      "grad_norm": 0.6896412372589111,
+      "learning_rate": 1.9e-05,
+      "loss": 0.0012,
+      "step": 1620
+    },
+    {
+      "epoch": 8.15,
+      "grad_norm": 0.0872649997472763,
+      "learning_rate": 1.85e-05,
+      "loss": 0.0013,
+      "step": 1630
+    },
+    {
+      "epoch": 8.2,
+      "grad_norm": 0.639758288860321,
+      "learning_rate": 1.8e-05,
+      "loss": 0.0023,
+      "step": 1640
+    },
+    {
+      "epoch": 8.25,
+      "grad_norm": 0.17185546457767487,
+      "learning_rate": 1.75e-05,
+      "loss": 0.0013,
+      "step": 1650
+    },
+    {
+      "epoch": 8.3,
+      "grad_norm": 1.2242165803909302,
+      "learning_rate": 1.7000000000000003e-05,
+      "loss": 0.0016,
+      "step": 1660
+    },
+    {
+      "epoch": 8.35,
+      "grad_norm": 0.9778568148612976,
+      "learning_rate": 1.65e-05,
+      "loss": 0.0014,
+      "step": 1670
+    },
+    {
+      "epoch": 8.4,
+      "grad_norm": 0.3234766721725464,
+      "learning_rate": 1.6000000000000003e-05,
+      "loss": 0.001,
+      "step": 1680
+    },
+    {
+      "epoch": 8.45,
+      "grad_norm": 1.149452805519104,
+      "learning_rate": 1.55e-05,
+      "loss": 0.0014,
+      "step": 1690
+    },
+    {
+      "epoch": 8.5,
+      "grad_norm": 0.6951824426651001,
+      "learning_rate": 1.5e-05,
+      "loss": 0.0013,
+      "step": 1700
+    },
+    {
+      "epoch": 8.55,
+      "grad_norm": 0.7042409181594849,
+      "learning_rate": 1.45e-05,
+      "loss": 0.0016,
+      "step": 1710
+    },
+    {
+      "epoch": 8.6,
+      "grad_norm": 0.1889318823814392,
+      "learning_rate": 1.4000000000000001e-05,
+      "loss": 0.0014,
+      "step": 1720
+    },
+    {
+      "epoch": 8.65,
+      "grad_norm": 0.9096256494522095,
+      "learning_rate": 1.3500000000000001e-05,
+      "loss": 0.0016,
+      "step": 1730
+    },
+    {
+      "epoch": 8.7,
+      "grad_norm": 0.3144957721233368,
+      "learning_rate": 1.3000000000000001e-05,
+      "loss": 0.001,
+      "step": 1740
+    },
+    {
+      "epoch": 8.75,
+      "grad_norm": 0.6703792810440063,
+      "learning_rate": 1.25e-05,
+      "loss": 0.0017,
+      "step": 1750
+    },
+    {
+      "epoch": 8.8,
+      "grad_norm": 0.6046918034553528,
+      "learning_rate": 1.2e-05,
+      "loss": 0.0015,
+      "step": 1760
+    },
+    {
+      "epoch": 8.85,
+      "grad_norm": 0.3708367645740509,
+      "learning_rate": 1.1500000000000002e-05,
+      "loss": 0.0012,
+      "step": 1770
+    },
+    {
+      "epoch": 8.9,
+      "grad_norm": 1.035162329673767,
+      "learning_rate": 1.1000000000000001e-05,
+      "loss": 0.0013,
+      "step": 1780
+    },
+    {
+      "epoch": 8.95,
+      "grad_norm": 0.273884117603302,
+      "learning_rate": 1.05e-05,
+      "loss": 0.0013,
+      "step": 1790
+    },
+    {
+      "epoch": 9.0,
+      "grad_norm": 0.5523194074630737,
+      "learning_rate": 1e-05,
+      "loss": 0.0017,
+      "step": 1800
+    },
+    {
+      "epoch": 9.0,
+      "eval_loss": 0.05794011056423187,
+      "eval_mse": 0.05794011056423187,
+      "eval_runtime": 16.743,
+      "eval_samples_per_second": 23.891,
+      "eval_steps_per_second": 2.986,
+      "step": 1800
+    },
+    {
+      "epoch": 9.05,
+      "grad_norm": 0.26945334672927856,
+      "learning_rate": 9.5e-06,
+      "loss": 0.0009,
+      "step": 1810
+    },
+    {
+      "epoch": 9.1,
+      "grad_norm": 0.4931265711784363,
+      "learning_rate": 9e-06,
+      "loss": 0.0005,
+      "step": 1820
+    },
+    {
+      "epoch": 9.15,
+      "grad_norm": 0.17124253511428833,
+      "learning_rate": 8.500000000000002e-06,
+      "loss": 0.0005,
+      "step": 1830
+    },
+    {
+      "epoch": 9.2,
+      "grad_norm": 0.5681818723678589,
+      "learning_rate": 8.000000000000001e-06,
+      "loss": 0.0005,
+      "step": 1840
+    },
+    {
+      "epoch": 9.25,
+      "grad_norm": 0.5338633060455322,
+      "learning_rate": 7.5e-06,
+      "loss": 0.0004,
+      "step": 1850
+    },
+    {
+      "epoch": 9.3,
+      "grad_norm": 0.28590112924575806,
+      "learning_rate": 7.000000000000001e-06,
+      "loss": 0.0004,
+      "step": 1860
+    },
+    {
+      "epoch": 9.35,
+      "grad_norm": 0.047065772116184235,
+      "learning_rate": 6.5000000000000004e-06,
+      "loss": 0.0003,
+      "step": 1870
+    },
+    {
+      "epoch": 9.4,
+      "grad_norm": 0.1954226791858673,
+      "learning_rate": 6e-06,
+      "loss": 0.0003,
+      "step": 1880
+    },
+    {
+      "epoch": 9.45,
+      "grad_norm": 0.20763620734214783,
+      "learning_rate": 5.500000000000001e-06,
+      "loss": 0.0005,
+      "step": 1890
+    },
+    {
+      "epoch": 9.5,
+      "grad_norm": 0.10486303269863129,
+      "learning_rate": 5e-06,
+      "loss": 0.0004,
+      "step": 1900
+    },
+    {
+      "epoch": 9.55,
+      "grad_norm": 0.09675983339548111,
+      "learning_rate": 4.5e-06,
+      "loss": 0.0007,
+      "step": 1910
+    },
+    {
+      "epoch": 9.6,
+      "grad_norm": 0.12553900480270386,
+      "learning_rate": 4.000000000000001e-06,
+      "loss": 0.0004,
+      "step": 1920
+    },
+    {
+      "epoch": 9.65,
+      "grad_norm": 0.300682008266449,
+      "learning_rate": 3.5000000000000004e-06,
+      "loss": 0.0004,
+      "step": 1930
+    },
+    {
+      "epoch": 9.7,
+      "grad_norm": 0.2784073054790497,
+      "learning_rate": 3e-06,
+      "loss": 0.0004,
+      "step": 1940
+    },
+    {
+      "epoch": 9.75,
+      "grad_norm": 0.21158581972122192,
+      "learning_rate": 2.5e-06,
+      "loss": 0.0004,
+      "step": 1950
+    },
+    {
+      "epoch": 9.8,
+      "grad_norm": 0.36177006363868713,
+      "learning_rate": 2.0000000000000003e-06,
+      "loss": 0.0004,
+      "step": 1960
+    },
+    {
+      "epoch": 9.85,
+      "grad_norm": 0.07186894863843918,
+      "learning_rate": 1.5e-06,
+      "loss": 0.0004,
+      "step": 1970
+    },
+    {
+      "epoch": 9.9,
+      "grad_norm": 0.07330793887376785,
+      "learning_rate": 1.0000000000000002e-06,
+      "loss": 0.0005,
+      "step": 1980
+    },
+    {
+      "epoch": 9.95,
+      "grad_norm": 0.1601114273071289,
+      "learning_rate": 5.000000000000001e-07,
+      "loss": 0.0005,
+      "step": 1990
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.520894467830658,
+      "learning_rate": 0.0,
+      "loss": 0.0005,
+      "step": 2000
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 2000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 10,
+  "save_steps": 10,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b480aac25c4b78f1cc3dabceb4e63961ccbdce5223bd2dfa17bcab947ac96b9e
+size 5048