Model save

Browse files

Files changed (12) hide show

README.md +69 -0
all_results.json +13 -0
eval_results.json +8 -0
generation_config.json +6 -0
runs/Feb01_17-58-13_ip-26-0-165-24/events.out.tfevents.1706815561.ip-26-0-165-24.239318.1 +3 -0
train_results.json +8 -0
trainer_state.json +892 -0
wandb/debug-internal.log +42 -0
wandb/run-20240201_175850-i93q0p12/files/output.log +49 -0
wandb/run-20240201_175850-i93q0p12/files/wandb-summary.json +1 -1
wandb/run-20240201_175850-i93q0p12/logs/debug-internal.log +42 -0
wandb/run-20240201_175850-i93q0p12/run-i93q0p12.wandb +2 -2

README.md ADDED Viewed

	@@ -0,0 +1,69 @@

+---
+base_model: sanchit-gandhi/Mistral-7B-v0.1-6-layer
+tags:
+- trl
+- sft
+- generated_from_trainer
+datasets:
+- generator
+model-index:
+- name: sanchit-gandhi/Mistral-7B-v0.1-6-layer
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# sanchit-gandhi/Mistral-7B-v0.1-6-layer
+This model is a fine-tuned version of [sanchit-gandhi/Mistral-7B-v0.1-6-layer](https://huggingface.co/sanchit-gandhi/Mistral-7B-v0.1-6-layer) on the generator dataset.
+It achieves the following results on the evaluation set:
+- Loss: 2.1183
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0003
+- train_batch_size: 64
+- eval_batch_size: 32
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 8
+- total_train_batch_size: 512
+- total_eval_batch_size: 256
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 5
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 4.8342        | 1.0   | 273  | 4.7379          |
+| 3.3301        | 2.0   | 546  | 3.2846          |
+| 2.4158        | 3.0   | 819  | 2.4134          |
+| 2.1322        | 4.0   | 1092 | 2.1637          |
+| 2.0369        | 5.0   | 1365 | 2.1183          |
+### Framework versions
+- Transformers 4.36.2
+- Pytorch 2.1.2
+- Datasets 2.14.6
+- Tokenizers 0.15.0

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 5.0,
+    "eval_loss": 2.118281126022339,
+    "eval_runtime": 30.3009,
+    "eval_samples": 23110,
+    "eval_samples_per_second": 509.26,
+    "eval_steps_per_second": 2.013,
+    "train_loss": 3.477488596011431,
+    "train_runtime": 5141.5129,
+    "train_samples": 207865,
+    "train_samples_per_second": 135.588,
+    "train_steps_per_second": 0.265
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 5.0,
+    "eval_loss": 2.118281126022339,
+    "eval_runtime": 30.3009,
+    "eval_samples": 23110,
+    "eval_samples_per_second": 509.26,
+    "eval_steps_per_second": 2.013
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "transformers_version": "4.36.2"
+}

runs/Feb01_17-58-13_ip-26-0-165-24/events.out.tfevents.1706815561.ip-26-0-165-24.239318.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4132739882d1f8d3f4b8d46f83c2519fdce3f5de5dab0de02ecd3939cc3721a5
+size 359

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 5.0,
+    "train_loss": 3.477488596011431,
+    "train_runtime": 5141.5129,
+    "train_samples": 207865,
+    "train_samples_per_second": 135.588,
+    "train_steps_per_second": 0.265
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,892 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 1365,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "learning_rate": 2.18978102189781e-06,
+      "loss": 13.9701,
+      "step": 1
+    },
+    {
+      "epoch": 0.04,
+      "learning_rate": 2.1897810218978098e-05,
+      "loss": 9.8829,
+      "step": 10
+    },
+    {
+      "epoch": 0.07,
+      "learning_rate": 4.3795620437956196e-05,
+      "loss": 7.6246,
+      "step": 20
+    },
+    {
+      "epoch": 0.11,
+      "learning_rate": 6.56934306569343e-05,
+      "loss": 7.2381,
+      "step": 30
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 8.759124087591239e-05,
+      "loss": 7.2,
+      "step": 40
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 0.00010948905109489051,
+      "loss": 7.1787,
+      "step": 50
+    },
+    {
+      "epoch": 0.22,
+      "learning_rate": 0.0001313868613138686,
+      "loss": 7.1182,
+      "step": 60
+    },
+    {
+      "epoch": 0.26,
+      "learning_rate": 0.00015328467153284672,
+      "loss": 7.1756,
+      "step": 70
+    },
+    {
+      "epoch": 0.29,
+      "learning_rate": 0.00017518248175182478,
+      "loss": 6.9204,
+      "step": 80
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 0.0001970802919708029,
+      "loss": 6.739,
+      "step": 90
+    },
+    {
+      "epoch": 0.37,
+      "learning_rate": 0.00021897810218978101,
+      "loss": 6.6124,
+      "step": 100
+    },
+    {
+      "epoch": 0.4,
+      "learning_rate": 0.0002408759124087591,
+      "loss": 6.5668,
+      "step": 110
+    },
+    {
+      "epoch": 0.44,
+      "learning_rate": 0.0002627737226277372,
+      "loss": 6.4359,
+      "step": 120
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 0.0002846715328467153,
+      "loss": 6.5331,
+      "step": 130
+    },
+    {
+      "epoch": 0.51,
+      "learning_rate": 0.00029999558221422155,
+      "loss": 6.4523,
+      "step": 140
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 0.00029991705103933765,
+      "loss": 6.2601,
+      "step": 150
+    },
+    {
+      "epoch": 0.59,
+      "learning_rate": 0.00029974040600590614,
+      "loss": 6.1484,
+      "step": 160
+    },
+    {
+      "epoch": 0.62,
+      "learning_rate": 0.0002994657627200285,
+      "loss": 6.0018,
+      "step": 170
+    },
+    {
+      "epoch": 0.66,
+      "learning_rate": 0.0002990933009231839,
+      "loss": 5.8596,
+      "step": 180
+    },
+    {
+      "epoch": 0.7,
+      "learning_rate": 0.0002986232643745964,
+      "loss": 5.7249,
+      "step": 190
+    },
+    {
+      "epoch": 0.73,
+      "learning_rate": 0.000298055960691706,
+      "loss": 5.5685,
+      "step": 200
+    },
+    {
+      "epoch": 0.77,
+      "learning_rate": 0.0002973917611488469,
+      "loss": 5.4467,
+      "step": 210
+    },
+    {
+      "epoch": 0.81,
+      "learning_rate": 0.0002966311004342651,
+      "loss": 5.3091,
+      "step": 220
+    },
+    {
+      "epoch": 0.84,
+      "learning_rate": 0.0002957744763656356,
+      "loss": 5.1977,
+      "step": 230
+    },
+    {
+      "epoch": 0.88,
+      "learning_rate": 0.00029482244956426253,
+      "loss": 5.0377,
+      "step": 240
+    },
+    {
+      "epoch": 0.92,
+      "learning_rate": 0.0002937756430881789,
+      "loss": 4.9283,
+      "step": 250
+    },
+    {
+      "epoch": 0.95,
+      "learning_rate": 0.0002926347420243833,
+      "loss": 5.091,
+      "step": 260
+    },
+    {
+      "epoch": 0.99,
+      "learning_rate": 0.0002914004930404816,
+      "loss": 4.8342,
+      "step": 270
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 4.737916946411133,
+      "eval_runtime": 30.7851,
+      "eval_samples_per_second": 501.248,
+      "eval_steps_per_second": 1.981,
+      "step": 273
+    },
+    {
+      "epoch": 1.03,
+      "learning_rate": 0.00029007370389602736,
+      "loss": 4.7528,
+      "step": 280
+    },
+    {
+      "epoch": 1.06,
+      "learning_rate": 0.00028865524291388006,
+      "loss": 4.6222,
+      "step": 290
+    },
+    {
+      "epoch": 1.1,
+      "learning_rate": 0.0002871460384119274,
+      "loss": 4.4689,
+      "step": 300
+    },
+    {
+      "epoch": 1.14,
+      "learning_rate": 0.00028554707809554385,
+      "loss": 4.3523,
+      "step": 310
+    },
+    {
+      "epoch": 1.17,
+      "learning_rate": 0.0002838594084111824,
+      "loss": 4.3172,
+      "step": 320
+    },
+    {
+      "epoch": 1.21,
+      "learning_rate": 0.00028208413386152326,
+      "loss": 4.1787,
+      "step": 330
+    },
+    {
+      "epoch": 1.25,
+      "learning_rate": 0.00028022241628262735,
+      "loss": 4.1082,
+      "step": 340
+    },
+    {
+      "epoch": 1.28,
+      "learning_rate": 0.00027827547408356773,
+      "loss": 3.9914,
+      "step": 350
+    },
+    {
+      "epoch": 1.32,
+      "learning_rate": 0.00027624458144903663,
+      "loss": 4.0363,
+      "step": 360
+    },
+    {
+      "epoch": 1.36,
+      "learning_rate": 0.0002741310675054493,
+      "loss": 5.2644,
+      "step": 370
+    },
+    {
+      "epoch": 1.39,
+      "learning_rate": 0.0002719363154510924,
+      "loss": 4.8735,
+      "step": 380
+    },
+    {
+      "epoch": 1.43,
+      "learning_rate": 0.000269661761650883,
+      "loss": 4.4014,
+      "step": 390
+    },
+    {
+      "epoch": 1.47,
+      "learning_rate": 0.00026730889469633406,
+      "loss": 4.1602,
+      "step": 400
+    },
+    {
+      "epoch": 1.5,
+      "learning_rate": 0.0002648792544313389,
+      "loss": 4.0027,
+      "step": 410
+    },
+    {
+      "epoch": 1.54,
+      "learning_rate": 0.0002623744309444141,
+      "loss": 3.9095,
+      "step": 420
+    },
+    {
+      "epoch": 1.58,
+      "learning_rate": 0.0002597960635280588,
+      "loss": 3.8225,
+      "step": 430
+    },
+    {
+      "epoch": 1.61,
+      "learning_rate": 0.00025714583960591324,
+      "loss": 3.7638,
+      "step": 440
+    },
+    {
+      "epoch": 1.65,
+      "learning_rate": 0.0002544254936284164,
+      "loss": 3.7038,
+      "step": 450
+    },
+    {
+      "epoch": 1.68,
+      "learning_rate": 0.0002516368059376883,
+      "loss": 3.6738,
+      "step": 460
+    },
+    {
+      "epoch": 1.72,
+      "learning_rate": 0.00024878160160237653,
+      "loss": 3.6142,
+      "step": 470
+    },
+    {
+      "epoch": 1.76,
+      "learning_rate": 0.00024586174922323293,
+      "loss": 3.5565,
+      "step": 480
+    },
+    {
+      "epoch": 1.79,
+      "learning_rate": 0.0002428791597101996,
+      "loss": 3.5071,
+      "step": 490
+    },
+    {
+      "epoch": 1.83,
+      "learning_rate": 0.00023983578503180541,
+      "loss": 3.4761,
+      "step": 500
+    },
+    {
+      "epoch": 1.87,
+      "learning_rate": 0.00023673361693769216,
+      "loss": 3.4575,
+      "step": 510
+    },
+    {
+      "epoch": 1.9,
+      "learning_rate": 0.00023357468565510535,
+      "loss": 3.4062,
+      "step": 520
+    },
+    {
+      "epoch": 1.94,
+      "learning_rate": 0.00023036105856020315,
+      "loss": 3.3653,
+      "step": 530
+    },
+    {
+      "epoch": 1.98,
+      "learning_rate": 0.00022709483882505315,
+      "loss": 3.3301,
+      "step": 540
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 3.2846388816833496,
+      "eval_runtime": 30.5512,
+      "eval_samples_per_second": 505.086,
+      "eval_steps_per_second": 1.997,
+      "step": 546
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 0.00022377816404120263,
+      "loss": 3.2758,
+      "step": 550
+    },
+    {
+      "epoch": 2.05,
+      "learning_rate": 0.00022041320482072218,
+      "loss": 3.2522,
+      "step": 560
+    },
+    {
+      "epoch": 2.09,
+      "learning_rate": 0.00021700216337563975,
+      "loss": 3.1993,
+      "step": 570
+    },
+    {
+      "epoch": 2.12,
+      "learning_rate": 0.00021354727207669315,
+      "loss": 3.147,
+      "step": 580
+    },
+    {
+      "epoch": 2.16,
+      "learning_rate": 0.00021005079199234558,
+      "loss": 3.1192,
+      "step": 590
+    },
+    {
+      "epoch": 2.2,
+      "learning_rate": 0.00020651501140901961,
+      "loss": 3.0901,
+      "step": 600
+    },
+    {
+      "epoch": 2.23,
+      "learning_rate": 0.0002029422443335184,
+      "loss": 3.0812,
+      "step": 610
+    },
+    {
+      "epoch": 2.27,
+      "learning_rate": 0.00019933482897861385,
+      "loss": 3.0369,
+      "step": 620
+    },
+    {
+      "epoch": 2.31,
+      "learning_rate": 0.00019569512623279333,
+      "loss": 2.9916,
+      "step": 630
+    },
+    {
+      "epoch": 2.34,
+      "learning_rate": 0.00019202551811516592,
+      "loss": 2.9367,
+      "step": 640
+    },
+    {
+      "epoch": 2.38,
+      "learning_rate": 0.00018832840621653993,
+      "loss": 2.9235,
+      "step": 650
+    },
+    {
+      "epoch": 2.42,
+      "learning_rate": 0.00018460621012769126,
+      "loss": 3.0402,
+      "step": 660
+    },
+    {
+      "epoch": 2.45,
+      "learning_rate": 0.0001808613658558521,
+      "loss": 2.9328,
+      "step": 670
+    },
+    {
+      "epoch": 2.49,
+      "learning_rate": 0.00017709632423045527,
+      "loss": 2.8384,
+      "step": 680
+    },
+    {
+      "epoch": 2.53,
+      "learning_rate": 0.0001733135492991784,
+      "loss": 2.7372,
+      "step": 690
+    },
+    {
+      "epoch": 2.56,
+      "learning_rate": 0.00016951551671533753,
+      "loss": 2.7189,
+      "step": 700
+    },
+    {
+      "epoch": 2.6,
+      "learning_rate": 0.00016570471211768486,
+      "loss": 2.6697,
+      "step": 710
+    },
+    {
+      "epoch": 2.64,
+      "learning_rate": 0.00016188362950367204,
+      "loss": 2.6319,
+      "step": 720
+    },
+    {
+      "epoch": 2.67,
+      "learning_rate": 0.00015805476959724273,
+      "loss": 2.5963,
+      "step": 730
+    },
+    {
+      "epoch": 2.71,
+      "learning_rate": 0.00015422063821222292,
+      "loss": 2.5732,
+      "step": 740
+    },
+    {
+      "epoch": 2.75,
+      "learning_rate": 0.00015038374461238062,
+      "loss": 2.5426,
+      "step": 750
+    },
+    {
+      "epoch": 2.78,
+      "learning_rate": 0.00014654659986922697,
+      "loss": 2.5217,
+      "step": 760
+    },
+    {
+      "epoch": 2.82,
+      "learning_rate": 0.00014271171521863514,
+      "loss": 2.4971,
+      "step": 770
+    },
+    {
+      "epoch": 2.86,
+      "learning_rate": 0.00013888160041735086,
+      "loss": 2.4917,
+      "step": 780
+    },
+    {
+      "epoch": 2.89,
+      "learning_rate": 0.0001350587621004716,
+      "loss": 2.4795,
+      "step": 790
+    },
+    {
+      "epoch": 2.93,
+      "learning_rate": 0.00013124570214096816,
+      "loss": 2.4464,
+      "step": 800
+    },
+    {
+      "epoch": 2.97,
+      "learning_rate": 0.00012744491601232355,
+      "loss": 2.4158,
+      "step": 810
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 2.413381576538086,
+      "eval_runtime": 30.6479,
+      "eval_samples_per_second": 503.492,
+      "eval_steps_per_second": 1.99,
+      "step": 819
+    },
+    {
+      "epoch": 3.0,
+      "learning_rate": 0.00012365889115535916,
+      "loss": 2.402,
+      "step": 820
+    },
+    {
+      "epoch": 3.04,
+      "learning_rate": 0.00011989010535031889,
+      "loss": 2.3491,
+      "step": 830
+    },
+    {
+      "epoch": 3.08,
+      "learning_rate": 0.00011614102509527481,
+      "loss": 2.3247,
+      "step": 840
+    },
+    {
+      "epoch": 3.11,
+      "learning_rate": 0.00011241410399191728,
+      "loss": 2.3179,
+      "step": 850
+    },
+    {
+      "epoch": 3.15,
+      "learning_rate": 0.00010871178113978432,
+      "loss": 2.3006,
+      "step": 860
+    },
+    {
+      "epoch": 3.19,
+      "learning_rate": 0.00010503647953998295,
+      "loss": 2.305,
+      "step": 870
+    },
+    {
+      "epoch": 3.22,
+      "learning_rate": 0.00010139060450944528,
+      "loss": 2.2922,
+      "step": 880
+    },
+    {
+      "epoch": 3.26,
+      "learning_rate": 9.777654210675867e-05,
+      "loss": 2.2766,
+      "step": 890
+    },
+    {
+      "epoch": 3.3,
+      "learning_rate": 9.419665757059952e-05,
+      "loss": 2.2732,
+      "step": 900
+    },
+    {
+      "epoch": 3.33,
+      "learning_rate": 9.065329377179248e-05,
+      "loss": 2.2591,
+      "step": 910
+    },
+    {
+      "epoch": 3.37,
+      "learning_rate": 8.714876968000853e-05,
+      "loss": 2.2477,
+      "step": 920
+    },
+    {
+      "epoch": 3.41,
+      "learning_rate": 8.368537884610555e-05,
+      "loss": 2.243,
+      "step": 930
+    },
+    {
+      "epoch": 3.44,
+      "learning_rate": 8.026538790110405e-05,
+      "loss": 2.2341,
+      "step": 940
+    },
+    {
+      "epoch": 3.48,
+      "learning_rate": 7.689103507278047e-05,
+      "loss": 2.2249,
+      "step": 950
+    },
+    {
+      "epoch": 3.52,
+      "learning_rate": 7.356452872084971e-05,
+      "loss": 2.236,
+      "step": 960
+    },
+    {
+      "epoch": 3.55,
+      "learning_rate": 7.028804589169443e-05,
+      "loss": 2.2097,
+      "step": 970
+    },
+    {
+      "epoch": 3.59,
+      "learning_rate": 6.706373089358791e-05,
+      "loss": 2.1968,
+      "step": 980
+    },
+    {
+      "epoch": 3.63,
+      "learning_rate": 6.389369389334193e-05,
+      "loss": 2.187,
+      "step": 990
+    },
+    {
+      "epoch": 3.66,
+      "learning_rate": 6.0780009535299393e-05,
+      "loss": 2.1865,
+      "step": 1000
+    },
+    {
+      "epoch": 3.7,
+      "learning_rate": 5.772471558357407e-05,
+      "loss": 2.1732,
+      "step": 1010
+    },
+    {
+      "epoch": 3.74,
+      "learning_rate": 5.4729811588427536e-05,
+      "loss": 2.1648,
+      "step": 1020
+    },
+    {
+      "epoch": 3.77,
+      "learning_rate": 5.179725757765449e-05,
+      "loss": 2.1696,
+      "step": 1030
+    },
+    {
+      "epoch": 3.81,
+      "learning_rate": 4.892897277383434e-05,
+      "loss": 2.1591,
+      "step": 1040
+    },
+    {
+      "epoch": 3.85,
+      "learning_rate": 4.6126834338287713e-05,
+      "loss": 2.1536,
+      "step": 1050
+    },
+    {
+      "epoch": 3.88,
+      "learning_rate": 4.339267614256027e-05,
+      "loss": 2.1536,
+      "step": 1060
+    },
+    {
+      "epoch": 3.92,
+      "learning_rate": 4.07282875682373e-05,
+      "loss": 2.1404,
+      "step": 1070
+    },
+    {
+      "epoch": 3.96,
+      "learning_rate": 3.813541233587552e-05,
+      "loss": 2.1403,
+      "step": 1080
+    },
+    {
+      "epoch": 3.99,
+      "learning_rate": 3.561574736381752e-05,
+      "loss": 2.1322,
+      "step": 1090
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 2.1637322902679443,
+      "eval_runtime": 30.5067,
+      "eval_samples_per_second": 505.824,
+      "eval_steps_per_second": 2.0,
+      "step": 1092
+    },
+    {
+      "epoch": 4.03,
+      "learning_rate": 3.317094165763639e-05,
+      "loss": 2.0822,
+      "step": 1100
+    },
+    {
+      "epoch": 4.07,
+      "learning_rate": 3.080259523093675e-05,
+      "loss": 2.0771,
+      "step": 1110
+    },
+    {
+      "epoch": 4.1,
+      "learning_rate": 2.8512258058219112e-05,
+      "loss": 2.0782,
+      "step": 1120
+    },
+    {
+      "epoch": 4.14,
+      "learning_rate": 2.6301429060492306e-05,
+      "loss": 2.0688,
+      "step": 1130
+    },
+    {
+      "epoch": 4.18,
+      "learning_rate": 2.417155512429832e-05,
+      "loss": 2.0603,
+      "step": 1140
+    },
+    {
+      "epoch": 4.21,
+      "learning_rate": 2.2124030154791035e-05,
+      "loss": 2.0602,
+      "step": 1150
+    },
+    {
+      "epoch": 4.25,
+      "learning_rate": 2.0160194163489062e-05,
+      "loss": 2.0603,
+      "step": 1160
+    },
+    {
+      "epoch": 4.29,
+      "learning_rate": 1.828133239129944e-05,
+      "loss": 2.0617,
+      "step": 1170
+    },
+    {
+      "epoch": 4.32,
+      "learning_rate": 1.6488674467386278e-05,
+      "loss": 2.065,
+      "step": 1180
+    },
+    {
+      "epoch": 4.36,
+      "learning_rate": 1.47833936044345e-05,
+      "loss": 2.0479,
+      "step": 1190
+    },
+    {
+      "epoch": 4.4,
+      "learning_rate": 1.3166605830835903e-05,
+      "loss": 2.0553,
+      "step": 1200
+    },
+    {
+      "epoch": 4.43,
+      "learning_rate": 1.1639369260299463e-05,
+      "loss": 2.044,
+      "step": 1210
+    },
+    {
+      "epoch": 4.47,
+      "learning_rate": 1.0202683399364469e-05,
+      "loss": 2.0539,
+      "step": 1220
+    },
+    {
+      "epoch": 4.51,
+      "learning_rate": 8.857488493268839e-06,
+      "loss": 2.0471,
+      "step": 1230
+    },
+    {
+      "epoch": 4.54,
+      "learning_rate": 7.604664910601915e-06,
+      "loss": 2.0548,
+      "step": 1240
+    },
+    {
+      "epoch": 4.58,
+      "learning_rate": 6.445032567143238e-06,
+      "loss": 2.0447,
+      "step": 1250
+    },
+    {
+      "epoch": 4.62,
+      "learning_rate": 5.379350389265319e-06,
+      "loss": 2.0379,
+      "step": 1260
+    },
+    {
+      "epoch": 4.65,
+      "learning_rate": 4.408315817250818e-06,
+      "loss": 2.0351,
+      "step": 1270
+    },
+    {
+      "epoch": 4.69,
+      "learning_rate": 3.5325643488498757e-06,
+      "loss": 2.0463,
+      "step": 1280
+    },
+    {
+      "epoch": 4.73,
+      "learning_rate": 2.7526691233758334e-06,
+      "loss": 2.0436,
+      "step": 1290
+    },
+    {
+      "epoch": 4.76,
+      "learning_rate": 2.0691405466118307e-06,
+      "loss": 2.0491,
+      "step": 1300
+    },
+    {
+      "epoch": 4.8,
+      "learning_rate": 1.4824259567733698e-06,
+      "loss": 2.0461,
+      "step": 1310
+    },
+    {
+      "epoch": 4.84,
+      "learning_rate": 9.929093317461057e-07,
+      "loss": 2.041,
+      "step": 1320
+    },
+    {
+      "epoch": 4.87,
+      "learning_rate": 6.009110377897086e-07,
+      "loss": 2.04,
+      "step": 1330
+    },
+    {
+      "epoch": 4.91,
+      "learning_rate": 3.066876198728474e-07,
+      "loss": 2.0415,
+      "step": 1340
+    },
+    {
+      "epoch": 4.95,
+      "learning_rate": 1.1043163377627562e-07,
+      "loss": 2.0418,
+      "step": 1350
+    },
+    {
+      "epoch": 4.98,
+      "learning_rate": 1.2271520073786623e-08,
+      "loss": 2.0369,
+      "step": 1360
+    },
+    {
+      "epoch": 5.0,
+      "eval_loss": 2.118281126022339,
+      "eval_runtime": 30.6172,
+      "eval_samples_per_second": 503.999,
+      "eval_steps_per_second": 1.992,
+      "step": 1365
+    },
+    {
+      "epoch": 5.0,
+      "step": 1365,
+      "total_flos": 457285168005120.0,
+      "train_loss": 3.477488596011431,
+      "train_runtime": 5141.5129,
+      "train_samples_per_second": 135.588,
+      "train_steps_per_second": 0.265
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 1365,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 457285168005120.0,
+  "train_batch_size": 64,
+  "trial_name": null,
+  "trial_params": null
+}

wandb/debug-internal.log CHANGED Viewed

@@ -4459,3 +4459,45 @@
 2024-02-01 19:25:27,251 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: internal_messages
 2024-02-01 19:25:27,252 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: stop_status
 2024-02-01 19:25:27,253 DEBUG   SenderThread:239784 [sender.py:send_request():409] send_request: stop_status

 2024-02-01 19:25:27,251 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: internal_messages
 2024-02-01 19:25:27,252 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: stop_status
 2024-02-01 19:25:27,253 DEBUG   SenderThread:239784 [sender.py:send_request():409] send_request: stop_status
+2024-02-01 19:25:32,118 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: status_report
+2024-02-01 19:25:33,575 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:35,577 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:37,559 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: status_report
+2024-02-01 19:25:37,580 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:39,583 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:41,585 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:42,251 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: internal_messages
+2024-02-01 19:25:42,252 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: stop_status
+2024-02-01 19:25:42,253 DEBUG   SenderThread:239784 [sender.py:send_request():409] send_request: stop_status
+2024-02-01 19:25:43,038 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: status_report
+2024-02-01 19:25:43,588 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:45,590 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:47,593 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:48,502 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: status_report
+2024-02-01 19:25:49,595 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:51,005 DEBUG   SenderThread:239784 [sender.py:send():382] send: stats
+2024-02-01 19:25:51,598 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:53,600 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:53,968 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: status_report
+2024-02-01 19:25:55,603 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:57,252 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: internal_messages
+2024-02-01 19:25:57,253 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: stop_status
+2024-02-01 19:25:57,253 DEBUG   SenderThread:239784 [sender.py:send_request():409] send_request: stop_status
+2024-02-01 19:25:57,605 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:59,433 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: status_report
+2024-02-01 19:25:59,608 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:26:01,571 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: partial_history
+2024-02-01 19:26:01,573 DEBUG   SenderThread:239784 [sender.py:send():382] send: history
+2024-02-01 19:26:01,573 DEBUG   SenderThread:239784 [sender.py:send_request():409] send_request: summary_record
+2024-02-01 19:26:01,575 INFO    SenderThread:239784 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
+2024-02-01 19:26:01,611 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:26:01,611 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/wandb-summary.json
+2024-02-01 19:26:03,614 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:26:04,694 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: status_report
+2024-02-01 19:26:05,616 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:26:09,621 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:26:10,395 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: status_report
+2024-02-01 19:26:12,252 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: internal_messages
+2024-02-01 19:26:12,253 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: stop_status
+2024-02-01 19:26:12,253 DEBUG   SenderThread:239784 [sender.py:send_request():409] send_request: stop_status
+2024-02-01 19:26:13,626 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log

wandb/run-20240201_175850-i93q0p12/files/output.log CHANGED Viewed

@@ -1682,3 +1682,52 @@
 Training completed. Do not forget to share your model on huggingface.co/models =)
 100%|█████████████████████████████████████████████████████████████████████████████| 1365/1365 [1:25:34<00:00,  3.76s/it]
 [INFO|trainer.py:3614] 2024-02-01 19:24:29,900 >> Waiting for the current checkpoint push to be finished, this might take a couple of minutes.

 Training completed. Do not forget to share your model on huggingface.co/models =)
 100%|█████████████████████████████████████████████████████████████████████████████| 1365/1365 [1:25:34<00:00,  3.76s/it]
 [INFO|trainer.py:3614] 2024-02-01 19:24:29,900 >> Waiting for the current checkpoint push to be finished, this might take a couple of minutes.
+{'train_runtime': 5141.5129, 'train_samples_per_second': 135.588, 'train_steps_per_second': 0.265, 'train_loss': 3.477488596011431, 'epoch': 5.0}
+[INFO|trainer.py:3166] 2024-02-01 19:25:31,190 >> ***** Running Evaluation *****
+[INFO|trainer.py:3168] 2024-02-01 19:25:31,190 >>   Num examples = 15431
+[INFO|trainer.py:3171] 2024-02-01 19:25:31,190 >>   Batch size = 32
+  3%|██▊                                                                                 | 2/61 [00:00<00:14,  4.04it/s]
+***** train metrics *****
+  epoch                    =        5.0
+  train_loss               =     3.4775
+  train_runtime            = 1:25:41.51
+  train_samples            =     207865
+  train_samples_per_second =    135.588
+  train_steps_per_second   =      0.265
+100%|███████████████████████████████████████████████████████████████████████████████████| 61/61 [00:29<00:00,  2.04it/s]
+***** eval metrics *****
+  epoch                   =        5.0
+  eval_loss               =     2.1183
+  eval_runtime            = 0:00:30.30
+  eval_samples            =      23110
+  eval_samples_per_second =     509.26
+  eval_steps_per_second   =      2.013
+2024-02-01 19:26:01 - INFO - __main__ - *** Save model ***
+[INFO|trainer.py:2889] 2024-02-01 19:26:02,688 >> Saving model checkpoint to ./
+[INFO|configuration_utils.py:483] 2024-02-01 19:26:02,691 >> Configuration saved in ./config.json
+[INFO|configuration_utils.py:594] 2024-02-01 19:26:02,693 >> Configuration saved in ./generation_config.json
+[INFO|modeling_utils.py:2382] 2024-02-01 19:26:06,302 >> Model weights saved in ./pytorch_model.bin
+[INFO|tokenization_utils_base.py:2432] 2024-02-01 19:26:06,305 >> tokenizer config file saved in ./tokenizer_config.json
+[INFO|tokenization_utils_base.py:2441] 2024-02-01 19:26:06,307 >> Special tokens file saved in ./special_tokens_map.json
+[INFO|trainer.py:2889] 2024-02-01 19:26:07,389 >> Saving model checkpoint to ./
+[INFO|configuration_utils.py:483] 2024-02-01 19:26:07,392 >> Configuration saved in ./config.json
+[INFO|configuration_utils.py:594] 2024-02-01 19:26:07,394 >> Configuration saved in ./generation_config.json
+[INFO|modeling_utils.py:2382] 2024-02-01 19:26:11,028 >> Model weights saved in ./pytorch_model.bin
+[INFO|tokenization_utils_base.py:2432] 2024-02-01 19:26:11,031 >> tokenizer config file saved in ./tokenizer_config.json
+[INFO|tokenization_utils_base.py:2441] 2024-02-01 19:26:11,033 >> Special tokens file saved in ./special_tokens_map.json
+[INFO|modelcard.py:452] 2024-02-01 19:26:11,224 >> Dropping the following result as it does not have all the necessary fields:

wandb/run-20240201_175850-i93q0p12/files/wandb-summary.json CHANGED Viewed

@@ -1 +1 @@

- {"train/loss": 2.0369, "train/learning_rate": 1.2271520073786623e-08, "train/epoch": 5.0, "train/global_step": 1365, "_timestamp": ~~1706815469~~.~~711052~~, "_runtime": ~~5139~~.~~627084970474~~, "_step": ~~142~~, "eval/loss": 2.118281126022339, "eval/runtime": 30.~~6172~~, "eval/samples_per_second": ~~503~~.~~999~~, "eval/steps_per_second": 1.~~992~~, "train/train_runtime": 5141.5129, "train/train_samples_per_second": 135.588, "train/train_steps_per_second": 0.265, "train/total_flos": 457285168005120.0, "train/train_loss": 3.477488596011431}

+ {"train/loss": 2.0369, "train/learning_rate": 1.2271520073786623e-08, "train/epoch": 5.0, "train/global_step": 1365, "_timestamp": 1706815561.5711305, "_runtime": 5231.487163543701, "_step": 143, "eval/loss": 2.118281126022339, "eval/runtime": 30.3009, "eval/samples_per_second": 509.26, "eval/steps_per_second": 2.013, "train/train_runtime": 5141.5129, "train/train_samples_per_second": 135.588, "train/train_steps_per_second": 0.265, "train/total_flos": 457285168005120.0, "train/train_loss": 3.477488596011431}

wandb/run-20240201_175850-i93q0p12/logs/debug-internal.log CHANGED Viewed

@@ -4459,3 +4459,45 @@
 2024-02-01 19:25:27,251 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: internal_messages
 2024-02-01 19:25:27,252 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: stop_status
 2024-02-01 19:25:27,253 DEBUG   SenderThread:239784 [sender.py:send_request():409] send_request: stop_status

 2024-02-01 19:25:27,251 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: internal_messages
 2024-02-01 19:25:27,252 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: stop_status
 2024-02-01 19:25:27,253 DEBUG   SenderThread:239784 [sender.py:send_request():409] send_request: stop_status
+2024-02-01 19:25:32,118 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: status_report
+2024-02-01 19:25:33,575 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:35,577 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:37,559 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: status_report
+2024-02-01 19:25:37,580 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:39,583 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:41,585 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:42,251 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: internal_messages
+2024-02-01 19:25:42,252 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: stop_status
+2024-02-01 19:25:42,253 DEBUG   SenderThread:239784 [sender.py:send_request():409] send_request: stop_status
+2024-02-01 19:25:43,038 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: status_report
+2024-02-01 19:25:43,588 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:45,590 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:47,593 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:48,502 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: status_report
+2024-02-01 19:25:49,595 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:51,005 DEBUG   SenderThread:239784 [sender.py:send():382] send: stats
+2024-02-01 19:25:51,598 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:53,600 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:53,968 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: status_report
+2024-02-01 19:25:55,603 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:57,252 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: internal_messages
+2024-02-01 19:25:57,253 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: stop_status
+2024-02-01 19:25:57,253 DEBUG   SenderThread:239784 [sender.py:send_request():409] send_request: stop_status
+2024-02-01 19:25:57,605 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:25:59,433 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: status_report
+2024-02-01 19:25:59,608 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:26:01,571 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: partial_history
+2024-02-01 19:26:01,573 DEBUG   SenderThread:239784 [sender.py:send():382] send: history
+2024-02-01 19:26:01,573 DEBUG   SenderThread:239784 [sender.py:send_request():409] send_request: summary_record
+2024-02-01 19:26:01,575 INFO    SenderThread:239784 [sender.py:_save_file():1392] saving file wandb-summary.json with policy end
+2024-02-01 19:26:01,611 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:26:01,611 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/wandb-summary.json
+2024-02-01 19:26:03,614 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:26:04,694 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: status_report
+2024-02-01 19:26:05,616 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:26:09,621 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log
+2024-02-01 19:26:10,395 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: status_report
+2024-02-01 19:26:12,252 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: internal_messages
+2024-02-01 19:26:12,253 DEBUG   HandlerThread:239784 [handler.py:handle_request():146] handle_request: stop_status
+2024-02-01 19:26:12,253 DEBUG   SenderThread:239784 [sender.py:send_request():409] send_request: stop_status
+2024-02-01 19:26:13,626 INFO    Thread-12 :239784 [dir_watcher.py:_on_file_modified():288] file/dir modified: /fsx/sanchit/distil-zephyr-1.5b-ssft/wandb/run-20240201_175850-i93q0p12/files/output.log

wandb/run-20240201_175850-i93q0p12/run-i93q0p12.wandb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83bb2becd0a41bef6a83fb4db8cc052bae33199ac7e7fd2c198b5018f4037fd9
-size 1540113

 version https://git-lfs.github.com/spec/v1
+oid sha256:18cc7a0b99001252605aad90d7a9da2c46834f3f63607f44557cc2ada6b58562
+size 1573084