suyeon0809 commited on May 13, 2024

Commit

f5e29c7

verified ·

1 Parent(s): dfb613f

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

logs/events.out.tfevents.1715596609.e6ce83ce3790.2044.0 +3 -0
model/model0513/README.md +202 -0
model/model0513/adapter_config.json +28 -0
model/model0513/adapter_model.safetensors +3 -0
outputs/checkpoint-1000/README.md +202 -0
outputs/checkpoint-1000/adapter_config.json +28 -0
outputs/checkpoint-1000/adapter_model.safetensors +3 -0
outputs/checkpoint-1000/optimizer.pt +3 -0
outputs/checkpoint-1000/rng_state.pth +3 -0
outputs/checkpoint-1000/scheduler.pt +3 -0
outputs/checkpoint-1000/trainer_state.json +383 -0
outputs/checkpoint-1000/training_args.bin +3 -0
outputs/checkpoint-1500/README.md +202 -0
outputs/checkpoint-1500/adapter_config.json +28 -0
outputs/checkpoint-1500/adapter_model.safetensors +3 -0
outputs/checkpoint-1500/optimizer.pt +3 -0
outputs/checkpoint-1500/rng_state.pth +3 -0
outputs/checkpoint-1500/scheduler.pt +3 -0
outputs/checkpoint-1500/trainer_state.json +558 -0
outputs/checkpoint-1500/training_args.bin +3 -0
outputs/checkpoint-2000/README.md +202 -0
outputs/checkpoint-2000/adapter_config.json +28 -0
outputs/checkpoint-2000/adapter_model.safetensors +3 -0
outputs/checkpoint-2000/optimizer.pt +3 -0
outputs/checkpoint-2000/rng_state.pth +3 -0
outputs/checkpoint-2000/scheduler.pt +3 -0
outputs/checkpoint-2000/trainer_state.json +733 -0
outputs/checkpoint-2000/training_args.bin +3 -0
outputs/checkpoint-2500/README.md +202 -0
outputs/checkpoint-2500/adapter_config.json +28 -0
outputs/checkpoint-2500/adapter_model.safetensors +3 -0
outputs/checkpoint-2500/optimizer.pt +3 -0
outputs/checkpoint-2500/rng_state.pth +3 -0
outputs/checkpoint-2500/scheduler.pt +3 -0
outputs/checkpoint-2500/trainer_state.json +908 -0
outputs/checkpoint-2500/training_args.bin +3 -0
outputs/checkpoint-3000/README.md +202 -0
outputs/checkpoint-3000/adapter_config.json +28 -0
outputs/checkpoint-3000/adapter_model.safetensors +3 -0
outputs/checkpoint-3000/optimizer.pt +3 -0
outputs/checkpoint-3000/rng_state.pth +3 -0
outputs/checkpoint-3000/scheduler.pt +3 -0
outputs/checkpoint-3000/trainer_state.json +1083 -0
outputs/checkpoint-3000/training_args.bin +3 -0
outputs/checkpoint-500/README.md +202 -0
outputs/checkpoint-500/adapter_config.json +28 -0
outputs/checkpoint-500/adapter_model.safetensors +3 -0
outputs/checkpoint-500/optimizer.pt +3 -0
outputs/checkpoint-500/rng_state.pth +3 -0
outputs/checkpoint-500/scheduler.pt +3 -0

logs/events.out.tfevents.1715596609.e6ce83ce3790.2044.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6fdc6c821af33f67499443497331aecd3db4fda29618e5a5b8a350a647a21b9e
+size 37357

model/model0513/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+library_name: peft
+base_model: beomi/KoAlpaca-Polyglot-5.8B
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.10.1.dev0

model/model0513/adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "beomi/KoAlpaca-Polyglot-5.8B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

model/model0513/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db593c2140cd43e4033c707b6e9a19433a0a2d1df3c914c8746516b23ad7af7f
+size 14688200

outputs/checkpoint-1000/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+library_name: peft
+base_model: beomi/KoAlpaca-Polyglot-5.8B
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.10.1.dev0

outputs/checkpoint-1000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "beomi/KoAlpaca-Polyglot-5.8B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

outputs/checkpoint-1000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9969dc80228b8b5d1af7cfc2012dbe5f054553fa7989fc4764c229292aab679e
+size 14688200

outputs/checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:677214d3b7e4355027b9c08cb6c24858698ae25aaf45cbf2571136fdc931c523
+size 29407610

outputs/checkpoint-1000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24e6982876cf3143ba3011c62f4df908d8828997018ef89961b16ed23a2c106f
+size 14244

outputs/checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6059e9c2fda3a593b36dbce4b103939bd72cee3db503aff41bf764fa9eaaa4cc
+size 1064

outputs/checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,383 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.8598452278589854,
+  "eval_steps": 500,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.017196904557179708,
+      "grad_norm": 1.7918040752410889,
+      "learning_rate": 9.933333333333334e-05,
+      "loss": 4.1644,
+      "step": 20
+    },
+    {
+      "epoch": 0.034393809114359415,
+      "grad_norm": 1.8427823781967163,
+      "learning_rate": 9.866666666666668e-05,
+      "loss": 2.7767,
+      "step": 40
+    },
+    {
+      "epoch": 0.051590713671539126,
+      "grad_norm": 1.2594960927963257,
+      "learning_rate": 9.8e-05,
+      "loss": 2.3015,
+      "step": 60
+    },
+    {
+      "epoch": 0.06878761822871883,
+      "grad_norm": 1.5001391172409058,
+      "learning_rate": 9.733333333333335e-05,
+      "loss": 1.9096,
+      "step": 80
+    },
+    {
+      "epoch": 0.08598452278589853,
+      "grad_norm": 1.4705618619918823,
+      "learning_rate": 9.666666666666667e-05,
+      "loss": 1.8592,
+      "step": 100
+    },
+    {
+      "epoch": 0.10318142734307825,
+      "grad_norm": 1.678035020828247,
+      "learning_rate": 9.6e-05,
+      "loss": 1.7618,
+      "step": 120
+    },
+    {
+      "epoch": 0.12037833190025796,
+      "grad_norm": 1.9186018705368042,
+      "learning_rate": 9.533333333333334e-05,
+      "loss": 1.612,
+      "step": 140
+    },
+    {
+      "epoch": 0.13757523645743766,
+      "grad_norm": 2.0859336853027344,
+      "learning_rate": 9.466666666666667e-05,
+      "loss": 1.5829,
+      "step": 160
+    },
+    {
+      "epoch": 0.15477214101461736,
+      "grad_norm": 2.2418243885040283,
+      "learning_rate": 9.4e-05,
+      "loss": 1.6236,
+      "step": 180
+    },
+    {
+      "epoch": 0.17196904557179707,
+      "grad_norm": 2.3599705696105957,
+      "learning_rate": 9.333333333333334e-05,
+      "loss": 1.5204,
+      "step": 200
+    },
+    {
+      "epoch": 0.18916595012897677,
+      "grad_norm": 2.346595525741577,
+      "learning_rate": 9.266666666666666e-05,
+      "loss": 1.4757,
+      "step": 220
+    },
+    {
+      "epoch": 0.2063628546861565,
+      "grad_norm": 2.483389139175415,
+      "learning_rate": 9.200000000000001e-05,
+      "loss": 1.4369,
+      "step": 240
+    },
+    {
+      "epoch": 0.2235597592433362,
+      "grad_norm": 2.320002555847168,
+      "learning_rate": 9.133333333333334e-05,
+      "loss": 1.391,
+      "step": 260
+    },
+    {
+      "epoch": 0.2407566638005159,
+      "grad_norm": 3.222677230834961,
+      "learning_rate": 9.066666666666667e-05,
+      "loss": 1.5182,
+      "step": 280
+    },
+    {
+      "epoch": 0.2579535683576956,
+      "grad_norm": 2.7384626865386963,
+      "learning_rate": 9e-05,
+      "loss": 1.1515,
+      "step": 300
+    },
+    {
+      "epoch": 0.2751504729148753,
+      "grad_norm": 3.28292179107666,
+      "learning_rate": 8.933333333333334e-05,
+      "loss": 1.3981,
+      "step": 320
+    },
+    {
+      "epoch": 0.292347377472055,
+      "grad_norm": 2.6418075561523438,
+      "learning_rate": 8.866666666666668e-05,
+      "loss": 1.448,
+      "step": 340
+    },
+    {
+      "epoch": 0.30954428202923473,
+      "grad_norm": 2.810594081878662,
+      "learning_rate": 8.800000000000001e-05,
+      "loss": 1.1637,
+      "step": 360
+    },
+    {
+      "epoch": 0.32674118658641443,
+      "grad_norm": 2.897336006164551,
+      "learning_rate": 8.733333333333333e-05,
+      "loss": 1.3715,
+      "step": 380
+    },
+    {
+      "epoch": 0.34393809114359414,
+      "grad_norm": 3.5841643810272217,
+      "learning_rate": 8.666666666666667e-05,
+      "loss": 1.3044,
+      "step": 400
+    },
+    {
+      "epoch": 0.36113499570077384,
+      "grad_norm": 3.0653135776519775,
+      "learning_rate": 8.6e-05,
+      "loss": 1.1584,
+      "step": 420
+    },
+    {
+      "epoch": 0.37833190025795355,
+      "grad_norm": 3.761073112487793,
+      "learning_rate": 8.533333333333334e-05,
+      "loss": 1.2224,
+      "step": 440
+    },
+    {
+      "epoch": 0.39552880481513325,
+      "grad_norm": 3.481926441192627,
+      "learning_rate": 8.466666666666667e-05,
+      "loss": 1.1676,
+      "step": 460
+    },
+    {
+      "epoch": 0.412725709372313,
+      "grad_norm": 4.327862739562988,
+      "learning_rate": 8.4e-05,
+      "loss": 1.0294,
+      "step": 480
+    },
+    {
+      "epoch": 0.4299226139294927,
+      "grad_norm": 4.155755996704102,
+      "learning_rate": 8.333333333333334e-05,
+      "loss": 1.2208,
+      "step": 500
+    },
+    {
+      "epoch": 0.4471195184866724,
+      "grad_norm": 3.50590443611145,
+      "learning_rate": 8.266666666666667e-05,
+      "loss": 1.0706,
+      "step": 520
+    },
+    {
+      "epoch": 0.4643164230438521,
+      "grad_norm": 4.00937557220459,
+      "learning_rate": 8.2e-05,
+      "loss": 1.0627,
+      "step": 540
+    },
+    {
+      "epoch": 0.4815133276010318,
+      "grad_norm": 4.476954460144043,
+      "learning_rate": 8.133333333333334e-05,
+      "loss": 1.0246,
+      "step": 560
+    },
+    {
+      "epoch": 0.49871023215821153,
+      "grad_norm": 4.1531476974487305,
+      "learning_rate": 8.066666666666667e-05,
+      "loss": 1.2647,
+      "step": 580
+    },
+    {
+      "epoch": 0.5159071367153912,
+      "grad_norm": 3.9548251628875732,
+      "learning_rate": 8e-05,
+      "loss": 0.9846,
+      "step": 600
+    },
+    {
+      "epoch": 0.5331040412725709,
+      "grad_norm": 4.803060531616211,
+      "learning_rate": 7.933333333333334e-05,
+      "loss": 0.9058,
+      "step": 620
+    },
+    {
+      "epoch": 0.5503009458297506,
+      "grad_norm": 4.116948127746582,
+      "learning_rate": 7.866666666666666e-05,
+      "loss": 1.0455,
+      "step": 640
+    },
+    {
+      "epoch": 0.5674978503869303,
+      "grad_norm": 3.5376293659210205,
+      "learning_rate": 7.800000000000001e-05,
+      "loss": 1.0034,
+      "step": 660
+    },
+    {
+      "epoch": 0.58469475494411,
+      "grad_norm": 5.122928619384766,
+      "learning_rate": 7.733333333333333e-05,
+      "loss": 0.9539,
+      "step": 680
+    },
+    {
+      "epoch": 0.6018916595012898,
+      "grad_norm": 4.396443843841553,
+      "learning_rate": 7.666666666666667e-05,
+      "loss": 1.0106,
+      "step": 700
+    },
+    {
+      "epoch": 0.6190885640584695,
+      "grad_norm": 5.2031989097595215,
+      "learning_rate": 7.6e-05,
+      "loss": 1.1025,
+      "step": 720
+    },
+    {
+      "epoch": 0.6362854686156492,
+      "grad_norm": 4.93772554397583,
+      "learning_rate": 7.533333333333334e-05,
+      "loss": 1.0214,
+      "step": 740
+    },
+    {
+      "epoch": 0.6534823731728289,
+      "grad_norm": 3.970015048980713,
+      "learning_rate": 7.466666666666667e-05,
+      "loss": 0.8724,
+      "step": 760
+    },
+    {
+      "epoch": 0.6706792777300086,
+      "grad_norm": 4.316510200500488,
+      "learning_rate": 7.4e-05,
+      "loss": 0.9296,
+      "step": 780
+    },
+    {
+      "epoch": 0.6878761822871883,
+      "grad_norm": 5.551044464111328,
+      "learning_rate": 7.333333333333333e-05,
+      "loss": 0.9748,
+      "step": 800
+    },
+    {
+      "epoch": 0.705073086844368,
+      "grad_norm": 5.091616630554199,
+      "learning_rate": 7.266666666666667e-05,
+      "loss": 0.9048,
+      "step": 820
+    },
+    {
+      "epoch": 0.7222699914015477,
+      "grad_norm": 5.082363128662109,
+      "learning_rate": 7.2e-05,
+      "loss": 0.9605,
+      "step": 840
+    },
+    {
+      "epoch": 0.7394668959587274,
+      "grad_norm": 4.591577053070068,
+      "learning_rate": 7.133333333333334e-05,
+      "loss": 0.803,
+      "step": 860
+    },
+    {
+      "epoch": 0.7566638005159071,
+      "grad_norm": 3.200929880142212,
+      "learning_rate": 7.066666666666667e-05,
+      "loss": 0.8525,
+      "step": 880
+    },
+    {
+      "epoch": 0.7738607050730868,
+      "grad_norm": 5.56381368637085,
+      "learning_rate": 7e-05,
+      "loss": 0.8088,
+      "step": 900
+    },
+    {
+      "epoch": 0.7910576096302665,
+      "grad_norm": 4.371031761169434,
+      "learning_rate": 6.933333333333334e-05,
+      "loss": 0.811,
+      "step": 920
+    },
+    {
+      "epoch": 0.8082545141874462,
+      "grad_norm": 5.641899585723877,
+      "learning_rate": 6.866666666666666e-05,
+      "loss": 0.8693,
+      "step": 940
+    },
+    {
+      "epoch": 0.825451418744626,
+      "grad_norm": 5.0090436935424805,
+      "learning_rate": 6.800000000000001e-05,
+      "loss": 0.7813,
+      "step": 960
+    },
+    {
+      "epoch": 0.8426483233018057,
+      "grad_norm": 7.000046730041504,
+      "learning_rate": 6.733333333333333e-05,
+      "loss": 0.8189,
+      "step": 980
+    },
+    {
+      "epoch": 0.8598452278589854,
+      "grad_norm": 5.533496856689453,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 0.8019,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 3000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 5.032995769653658e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

outputs/checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:171c89f8ecd1388fc79e0fbedd3775495c90abd0688b87c0090ac48cd2e89673
+size 5048

outputs/checkpoint-1500/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+library_name: peft
+base_model: beomi/KoAlpaca-Polyglot-5.8B
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.10.1.dev0

outputs/checkpoint-1500/adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "beomi/KoAlpaca-Polyglot-5.8B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

outputs/checkpoint-1500/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:29a4bf47701b02beb64e9f1bcda2a54f5a3f30d98aa4f06fc8e00eef754c1bf9
+size 14688200

outputs/checkpoint-1500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:49cca35c49ed496d40d84b0e3f29b557533d44b1a4bcae7758fc5161bf44f583
+size 29407610

outputs/checkpoint-1500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9099a7d64aa04dab7dab41ebf3b6f2489e087b40d1e1d8a7e04f435a8063c0de
+size 14244

outputs/checkpoint-1500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a8d29792e469b6f760fa9178cf78d333450ccf71dfe26bcfa2231bc7242c219
+size 1064

outputs/checkpoint-1500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,558 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.2897678417884781,
+  "eval_steps": 500,
+  "global_step": 1500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.017196904557179708,
+      "grad_norm": 1.7918040752410889,
+      "learning_rate": 9.933333333333334e-05,
+      "loss": 4.1644,
+      "step": 20
+    },
+    {
+      "epoch": 0.034393809114359415,
+      "grad_norm": 1.8427823781967163,
+      "learning_rate": 9.866666666666668e-05,
+      "loss": 2.7767,
+      "step": 40
+    },
+    {
+      "epoch": 0.051590713671539126,
+      "grad_norm": 1.2594960927963257,
+      "learning_rate": 9.8e-05,
+      "loss": 2.3015,
+      "step": 60
+    },
+    {
+      "epoch": 0.06878761822871883,
+      "grad_norm": 1.5001391172409058,
+      "learning_rate": 9.733333333333335e-05,
+      "loss": 1.9096,
+      "step": 80
+    },
+    {
+      "epoch": 0.08598452278589853,
+      "grad_norm": 1.4705618619918823,
+      "learning_rate": 9.666666666666667e-05,
+      "loss": 1.8592,
+      "step": 100
+    },
+    {
+      "epoch": 0.10318142734307825,
+      "grad_norm": 1.678035020828247,
+      "learning_rate": 9.6e-05,
+      "loss": 1.7618,
+      "step": 120
+    },
+    {
+      "epoch": 0.12037833190025796,
+      "grad_norm": 1.9186018705368042,
+      "learning_rate": 9.533333333333334e-05,
+      "loss": 1.612,
+      "step": 140
+    },
+    {
+      "epoch": 0.13757523645743766,
+      "grad_norm": 2.0859336853027344,
+      "learning_rate": 9.466666666666667e-05,
+      "loss": 1.5829,
+      "step": 160
+    },
+    {
+      "epoch": 0.15477214101461736,
+      "grad_norm": 2.2418243885040283,
+      "learning_rate": 9.4e-05,
+      "loss": 1.6236,
+      "step": 180
+    },
+    {
+      "epoch": 0.17196904557179707,
+      "grad_norm": 2.3599705696105957,
+      "learning_rate": 9.333333333333334e-05,
+      "loss": 1.5204,
+      "step": 200
+    },
+    {
+      "epoch": 0.18916595012897677,
+      "grad_norm": 2.346595525741577,
+      "learning_rate": 9.266666666666666e-05,
+      "loss": 1.4757,
+      "step": 220
+    },
+    {
+      "epoch": 0.2063628546861565,
+      "grad_norm": 2.483389139175415,
+      "learning_rate": 9.200000000000001e-05,
+      "loss": 1.4369,
+      "step": 240
+    },
+    {
+      "epoch": 0.2235597592433362,
+      "grad_norm": 2.320002555847168,
+      "learning_rate": 9.133333333333334e-05,
+      "loss": 1.391,
+      "step": 260
+    },
+    {
+      "epoch": 0.2407566638005159,
+      "grad_norm": 3.222677230834961,
+      "learning_rate": 9.066666666666667e-05,
+      "loss": 1.5182,
+      "step": 280
+    },
+    {
+      "epoch": 0.2579535683576956,
+      "grad_norm": 2.7384626865386963,
+      "learning_rate": 9e-05,
+      "loss": 1.1515,
+      "step": 300
+    },
+    {
+      "epoch": 0.2751504729148753,
+      "grad_norm": 3.28292179107666,
+      "learning_rate": 8.933333333333334e-05,
+      "loss": 1.3981,
+      "step": 320
+    },
+    {
+      "epoch": 0.292347377472055,
+      "grad_norm": 2.6418075561523438,
+      "learning_rate": 8.866666666666668e-05,
+      "loss": 1.448,
+      "step": 340
+    },
+    {
+      "epoch": 0.30954428202923473,
+      "grad_norm": 2.810594081878662,
+      "learning_rate": 8.800000000000001e-05,
+      "loss": 1.1637,
+      "step": 360
+    },
+    {
+      "epoch": 0.32674118658641443,
+      "grad_norm": 2.897336006164551,
+      "learning_rate": 8.733333333333333e-05,
+      "loss": 1.3715,
+      "step": 380
+    },
+    {
+      "epoch": 0.34393809114359414,
+      "grad_norm": 3.5841643810272217,
+      "learning_rate": 8.666666666666667e-05,
+      "loss": 1.3044,
+      "step": 400
+    },
+    {
+      "epoch": 0.36113499570077384,
+      "grad_norm": 3.0653135776519775,
+      "learning_rate": 8.6e-05,
+      "loss": 1.1584,
+      "step": 420
+    },
+    {
+      "epoch": 0.37833190025795355,
+      "grad_norm": 3.761073112487793,
+      "learning_rate": 8.533333333333334e-05,
+      "loss": 1.2224,
+      "step": 440
+    },
+    {
+      "epoch": 0.39552880481513325,
+      "grad_norm": 3.481926441192627,
+      "learning_rate": 8.466666666666667e-05,
+      "loss": 1.1676,
+      "step": 460
+    },
+    {
+      "epoch": 0.412725709372313,
+      "grad_norm": 4.327862739562988,
+      "learning_rate": 8.4e-05,
+      "loss": 1.0294,
+      "step": 480
+    },
+    {
+      "epoch": 0.4299226139294927,
+      "grad_norm": 4.155755996704102,
+      "learning_rate": 8.333333333333334e-05,
+      "loss": 1.2208,
+      "step": 500
+    },
+    {
+      "epoch": 0.4471195184866724,
+      "grad_norm": 3.50590443611145,
+      "learning_rate": 8.266666666666667e-05,
+      "loss": 1.0706,
+      "step": 520
+    },
+    {
+      "epoch": 0.4643164230438521,
+      "grad_norm": 4.00937557220459,
+      "learning_rate": 8.2e-05,
+      "loss": 1.0627,
+      "step": 540
+    },
+    {
+      "epoch": 0.4815133276010318,
+      "grad_norm": 4.476954460144043,
+      "learning_rate": 8.133333333333334e-05,
+      "loss": 1.0246,
+      "step": 560
+    },
+    {
+      "epoch": 0.49871023215821153,
+      "grad_norm": 4.1531476974487305,
+      "learning_rate": 8.066666666666667e-05,
+      "loss": 1.2647,
+      "step": 580
+    },
+    {
+      "epoch": 0.5159071367153912,
+      "grad_norm": 3.9548251628875732,
+      "learning_rate": 8e-05,
+      "loss": 0.9846,
+      "step": 600
+    },
+    {
+      "epoch": 0.5331040412725709,
+      "grad_norm": 4.803060531616211,
+      "learning_rate": 7.933333333333334e-05,
+      "loss": 0.9058,
+      "step": 620
+    },
+    {
+      "epoch": 0.5503009458297506,
+      "grad_norm": 4.116948127746582,
+      "learning_rate": 7.866666666666666e-05,
+      "loss": 1.0455,
+      "step": 640
+    },
+    {
+      "epoch": 0.5674978503869303,
+      "grad_norm": 3.5376293659210205,
+      "learning_rate": 7.800000000000001e-05,
+      "loss": 1.0034,
+      "step": 660
+    },
+    {
+      "epoch": 0.58469475494411,
+      "grad_norm": 5.122928619384766,
+      "learning_rate": 7.733333333333333e-05,
+      "loss": 0.9539,
+      "step": 680
+    },
+    {
+      "epoch": 0.6018916595012898,
+      "grad_norm": 4.396443843841553,
+      "learning_rate": 7.666666666666667e-05,
+      "loss": 1.0106,
+      "step": 700
+    },
+    {
+      "epoch": 0.6190885640584695,
+      "grad_norm": 5.2031989097595215,
+      "learning_rate": 7.6e-05,
+      "loss": 1.1025,
+      "step": 720
+    },
+    {
+      "epoch": 0.6362854686156492,
+      "grad_norm": 4.93772554397583,
+      "learning_rate": 7.533333333333334e-05,
+      "loss": 1.0214,
+      "step": 740
+    },
+    {
+      "epoch": 0.6534823731728289,
+      "grad_norm": 3.970015048980713,
+      "learning_rate": 7.466666666666667e-05,
+      "loss": 0.8724,
+      "step": 760
+    },
+    {
+      "epoch": 0.6706792777300086,
+      "grad_norm": 4.316510200500488,
+      "learning_rate": 7.4e-05,
+      "loss": 0.9296,
+      "step": 780
+    },
+    {
+      "epoch": 0.6878761822871883,
+      "grad_norm": 5.551044464111328,
+      "learning_rate": 7.333333333333333e-05,
+      "loss": 0.9748,
+      "step": 800
+    },
+    {
+      "epoch": 0.705073086844368,
+      "grad_norm": 5.091616630554199,
+      "learning_rate": 7.266666666666667e-05,
+      "loss": 0.9048,
+      "step": 820
+    },
+    {
+      "epoch": 0.7222699914015477,
+      "grad_norm": 5.082363128662109,
+      "learning_rate": 7.2e-05,
+      "loss": 0.9605,
+      "step": 840
+    },
+    {
+      "epoch": 0.7394668959587274,
+      "grad_norm": 4.591577053070068,
+      "learning_rate": 7.133333333333334e-05,
+      "loss": 0.803,
+      "step": 860
+    },
+    {
+      "epoch": 0.7566638005159071,
+      "grad_norm": 3.200929880142212,
+      "learning_rate": 7.066666666666667e-05,
+      "loss": 0.8525,
+      "step": 880
+    },
+    {
+      "epoch": 0.7738607050730868,
+      "grad_norm": 5.56381368637085,
+      "learning_rate": 7e-05,
+      "loss": 0.8088,
+      "step": 900
+    },
+    {
+      "epoch": 0.7910576096302665,
+      "grad_norm": 4.371031761169434,
+      "learning_rate": 6.933333333333334e-05,
+      "loss": 0.811,
+      "step": 920
+    },
+    {
+      "epoch": 0.8082545141874462,
+      "grad_norm": 5.641899585723877,
+      "learning_rate": 6.866666666666666e-05,
+      "loss": 0.8693,
+      "step": 940
+    },
+    {
+      "epoch": 0.825451418744626,
+      "grad_norm": 5.0090436935424805,
+      "learning_rate": 6.800000000000001e-05,
+      "loss": 0.7813,
+      "step": 960
+    },
+    {
+      "epoch": 0.8426483233018057,
+      "grad_norm": 7.000046730041504,
+      "learning_rate": 6.733333333333333e-05,
+      "loss": 0.8189,
+      "step": 980
+    },
+    {
+      "epoch": 0.8598452278589854,
+      "grad_norm": 5.533496856689453,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 0.8019,
+      "step": 1000
+    },
+    {
+      "epoch": 0.8770421324161651,
+      "grad_norm": 5.878244400024414,
+      "learning_rate": 6.6e-05,
+      "loss": 0.7308,
+      "step": 1020
+    },
+    {
+      "epoch": 0.8942390369733448,
+      "grad_norm": 6.347448825836182,
+      "learning_rate": 6.533333333333334e-05,
+      "loss": 0.7523,
+      "step": 1040
+    },
+    {
+      "epoch": 0.9114359415305245,
+      "grad_norm": 5.9593634605407715,
+      "learning_rate": 6.466666666666666e-05,
+      "loss": 0.7736,
+      "step": 1060
+    },
+    {
+      "epoch": 0.9286328460877042,
+      "grad_norm": 5.173058986663818,
+      "learning_rate": 6.400000000000001e-05,
+      "loss": 0.803,
+      "step": 1080
+    },
+    {
+      "epoch": 0.945829750644884,
+      "grad_norm": 6.1787109375,
+      "learning_rate": 6.333333333333333e-05,
+      "loss": 0.7256,
+      "step": 1100
+    },
+    {
+      "epoch": 0.9630266552020637,
+      "grad_norm": 5.627285957336426,
+      "learning_rate": 6.266666666666667e-05,
+      "loss": 0.7492,
+      "step": 1120
+    },
+    {
+      "epoch": 0.9802235597592434,
+      "grad_norm": 5.914905071258545,
+      "learning_rate": 6.2e-05,
+      "loss": 0.6695,
+      "step": 1140
+    },
+    {
+      "epoch": 0.9974204643164231,
+      "grad_norm": 4.068761825561523,
+      "learning_rate": 6.133333333333334e-05,
+      "loss": 0.6607,
+      "step": 1160
+    },
+    {
+      "epoch": 1.0146173688736027,
+      "grad_norm": 5.116635322570801,
+      "learning_rate": 6.066666666666667e-05,
+      "loss": 0.5824,
+      "step": 1180
+    },
+    {
+      "epoch": 1.0318142734307825,
+      "grad_norm": 6.764676570892334,
+      "learning_rate": 6e-05,
+      "loss": 0.6238,
+      "step": 1200
+    },
+    {
+      "epoch": 1.049011177987962,
+      "grad_norm": 3.931511640548706,
+      "learning_rate": 5.9333333333333343e-05,
+      "loss": 0.5651,
+      "step": 1220
+    },
+    {
+      "epoch": 1.0662080825451419,
+      "grad_norm": 9.559135437011719,
+      "learning_rate": 5.866666666666667e-05,
+      "loss": 0.5615,
+      "step": 1240
+    },
+    {
+      "epoch": 1.0834049871023215,
+      "grad_norm": 8.055045127868652,
+      "learning_rate": 5.8e-05,
+      "loss": 0.5606,
+      "step": 1260
+    },
+    {
+      "epoch": 1.1006018916595013,
+      "grad_norm": 6.782190322875977,
+      "learning_rate": 5.7333333333333336e-05,
+      "loss": 0.5776,
+      "step": 1280
+    },
+    {
+      "epoch": 1.117798796216681,
+      "grad_norm": 5.142735004425049,
+      "learning_rate": 5.666666666666667e-05,
+      "loss": 0.5509,
+      "step": 1300
+    },
+    {
+      "epoch": 1.1349957007738607,
+      "grad_norm": 6.010578155517578,
+      "learning_rate": 5.6000000000000006e-05,
+      "loss": 0.5701,
+      "step": 1320
+    },
+    {
+      "epoch": 1.1521926053310405,
+      "grad_norm": 5.171779155731201,
+      "learning_rate": 5.5333333333333334e-05,
+      "loss": 0.5485,
+      "step": 1340
+    },
+    {
+      "epoch": 1.16938950988822,
+      "grad_norm": 5.51332426071167,
+      "learning_rate": 5.466666666666666e-05,
+      "loss": 0.5515,
+      "step": 1360
+    },
+    {
+      "epoch": 1.1865864144454,
+      "grad_norm": 6.2720947265625,
+      "learning_rate": 5.4000000000000005e-05,
+      "loss": 0.5503,
+      "step": 1380
+    },
+    {
+      "epoch": 1.2037833190025795,
+      "grad_norm": 6.498877048492432,
+      "learning_rate": 5.333333333333333e-05,
+      "loss": 0.525,
+      "step": 1400
+    },
+    {
+      "epoch": 1.2209802235597593,
+      "grad_norm": 5.129275321960449,
+      "learning_rate": 5.266666666666666e-05,
+      "loss": 0.4942,
+      "step": 1420
+    },
+    {
+      "epoch": 1.238177128116939,
+      "grad_norm": 4.145976543426514,
+      "learning_rate": 5.2000000000000004e-05,
+      "loss": 0.4961,
+      "step": 1440
+    },
+    {
+      "epoch": 1.2553740326741187,
+      "grad_norm": 6.678504467010498,
+      "learning_rate": 5.133333333333333e-05,
+      "loss": 0.5032,
+      "step": 1460
+    },
+    {
+      "epoch": 1.2725709372312983,
+      "grad_norm": 6.846457481384277,
+      "learning_rate": 5.0666666666666674e-05,
+      "loss": 0.5322,
+      "step": 1480
+    },
+    {
+      "epoch": 1.2897678417884781,
+      "grad_norm": 7.58906888961792,
+      "learning_rate": 5e-05,
+      "loss": 0.477,
+      "step": 1500
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 3000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 7.5643202693333e+16,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

outputs/checkpoint-1500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:171c89f8ecd1388fc79e0fbedd3775495c90abd0688b87c0090ac48cd2e89673
+size 5048

outputs/checkpoint-2000/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+library_name: peft
+base_model: beomi/KoAlpaca-Polyglot-5.8B
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.10.1.dev0

outputs/checkpoint-2000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "beomi/KoAlpaca-Polyglot-5.8B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

outputs/checkpoint-2000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:588b9a14baa0400d0166cc28b9ba031da6d5b0669e4d55e3a929389a64015bf3
+size 14688200

outputs/checkpoint-2000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5394c189196c89df39eb2f8ef432b002b6cbc1d7918fdd45f567f0c40e947692
+size 29407610

outputs/checkpoint-2000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d373a5db5ca364e513a20a5d21da42df8c71a4e15e43b0df6d76ef03a7baf1e0
+size 14244

outputs/checkpoint-2000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:340b1ce02209268feb9f9a5d9c012d838d8156c8e947b363b4610ed8e2619af9
+size 1064

outputs/checkpoint-2000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,733 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.7196904557179709,
+  "eval_steps": 500,
+  "global_step": 2000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.017196904557179708,
+      "grad_norm": 1.7918040752410889,
+      "learning_rate": 9.933333333333334e-05,
+      "loss": 4.1644,
+      "step": 20
+    },
+    {
+      "epoch": 0.034393809114359415,
+      "grad_norm": 1.8427823781967163,
+      "learning_rate": 9.866666666666668e-05,
+      "loss": 2.7767,
+      "step": 40
+    },
+    {
+      "epoch": 0.051590713671539126,
+      "grad_norm": 1.2594960927963257,
+      "learning_rate": 9.8e-05,
+      "loss": 2.3015,
+      "step": 60
+    },
+    {
+      "epoch": 0.06878761822871883,
+      "grad_norm": 1.5001391172409058,
+      "learning_rate": 9.733333333333335e-05,
+      "loss": 1.9096,
+      "step": 80
+    },
+    {
+      "epoch": 0.08598452278589853,
+      "grad_norm": 1.4705618619918823,
+      "learning_rate": 9.666666666666667e-05,
+      "loss": 1.8592,
+      "step": 100
+    },
+    {
+      "epoch": 0.10318142734307825,
+      "grad_norm": 1.678035020828247,
+      "learning_rate": 9.6e-05,
+      "loss": 1.7618,
+      "step": 120
+    },
+    {
+      "epoch": 0.12037833190025796,
+      "grad_norm": 1.9186018705368042,
+      "learning_rate": 9.533333333333334e-05,
+      "loss": 1.612,
+      "step": 140
+    },
+    {
+      "epoch": 0.13757523645743766,
+      "grad_norm": 2.0859336853027344,
+      "learning_rate": 9.466666666666667e-05,
+      "loss": 1.5829,
+      "step": 160
+    },
+    {
+      "epoch": 0.15477214101461736,
+      "grad_norm": 2.2418243885040283,
+      "learning_rate": 9.4e-05,
+      "loss": 1.6236,
+      "step": 180
+    },
+    {
+      "epoch": 0.17196904557179707,
+      "grad_norm": 2.3599705696105957,
+      "learning_rate": 9.333333333333334e-05,
+      "loss": 1.5204,
+      "step": 200
+    },
+    {
+      "epoch": 0.18916595012897677,
+      "grad_norm": 2.346595525741577,
+      "learning_rate": 9.266666666666666e-05,
+      "loss": 1.4757,
+      "step": 220
+    },
+    {
+      "epoch": 0.2063628546861565,
+      "grad_norm": 2.483389139175415,
+      "learning_rate": 9.200000000000001e-05,
+      "loss": 1.4369,
+      "step": 240
+    },
+    {
+      "epoch": 0.2235597592433362,
+      "grad_norm": 2.320002555847168,
+      "learning_rate": 9.133333333333334e-05,
+      "loss": 1.391,
+      "step": 260
+    },
+    {
+      "epoch": 0.2407566638005159,
+      "grad_norm": 3.222677230834961,
+      "learning_rate": 9.066666666666667e-05,
+      "loss": 1.5182,
+      "step": 280
+    },
+    {
+      "epoch": 0.2579535683576956,
+      "grad_norm": 2.7384626865386963,
+      "learning_rate": 9e-05,
+      "loss": 1.1515,
+      "step": 300
+    },
+    {
+      "epoch": 0.2751504729148753,
+      "grad_norm": 3.28292179107666,
+      "learning_rate": 8.933333333333334e-05,
+      "loss": 1.3981,
+      "step": 320
+    },
+    {
+      "epoch": 0.292347377472055,
+      "grad_norm": 2.6418075561523438,
+      "learning_rate": 8.866666666666668e-05,
+      "loss": 1.448,
+      "step": 340
+    },
+    {
+      "epoch": 0.30954428202923473,
+      "grad_norm": 2.810594081878662,
+      "learning_rate": 8.800000000000001e-05,
+      "loss": 1.1637,
+      "step": 360
+    },
+    {
+      "epoch": 0.32674118658641443,
+      "grad_norm": 2.897336006164551,
+      "learning_rate": 8.733333333333333e-05,
+      "loss": 1.3715,
+      "step": 380
+    },
+    {
+      "epoch": 0.34393809114359414,
+      "grad_norm": 3.5841643810272217,
+      "learning_rate": 8.666666666666667e-05,
+      "loss": 1.3044,
+      "step": 400
+    },
+    {
+      "epoch": 0.36113499570077384,
+      "grad_norm": 3.0653135776519775,
+      "learning_rate": 8.6e-05,
+      "loss": 1.1584,
+      "step": 420
+    },
+    {
+      "epoch": 0.37833190025795355,
+      "grad_norm": 3.761073112487793,
+      "learning_rate": 8.533333333333334e-05,
+      "loss": 1.2224,
+      "step": 440
+    },
+    {
+      "epoch": 0.39552880481513325,
+      "grad_norm": 3.481926441192627,
+      "learning_rate": 8.466666666666667e-05,
+      "loss": 1.1676,
+      "step": 460
+    },
+    {
+      "epoch": 0.412725709372313,
+      "grad_norm": 4.327862739562988,
+      "learning_rate": 8.4e-05,
+      "loss": 1.0294,
+      "step": 480
+    },
+    {
+      "epoch": 0.4299226139294927,
+      "grad_norm": 4.155755996704102,
+      "learning_rate": 8.333333333333334e-05,
+      "loss": 1.2208,
+      "step": 500
+    },
+    {
+      "epoch": 0.4471195184866724,
+      "grad_norm": 3.50590443611145,
+      "learning_rate": 8.266666666666667e-05,
+      "loss": 1.0706,
+      "step": 520
+    },
+    {
+      "epoch": 0.4643164230438521,
+      "grad_norm": 4.00937557220459,
+      "learning_rate": 8.2e-05,
+      "loss": 1.0627,
+      "step": 540
+    },
+    {
+      "epoch": 0.4815133276010318,
+      "grad_norm": 4.476954460144043,
+      "learning_rate": 8.133333333333334e-05,
+      "loss": 1.0246,
+      "step": 560
+    },
+    {
+      "epoch": 0.49871023215821153,
+      "grad_norm": 4.1531476974487305,
+      "learning_rate": 8.066666666666667e-05,
+      "loss": 1.2647,
+      "step": 580
+    },
+    {
+      "epoch": 0.5159071367153912,
+      "grad_norm": 3.9548251628875732,
+      "learning_rate": 8e-05,
+      "loss": 0.9846,
+      "step": 600
+    },
+    {
+      "epoch": 0.5331040412725709,
+      "grad_norm": 4.803060531616211,
+      "learning_rate": 7.933333333333334e-05,
+      "loss": 0.9058,
+      "step": 620
+    },
+    {
+      "epoch": 0.5503009458297506,
+      "grad_norm": 4.116948127746582,
+      "learning_rate": 7.866666666666666e-05,
+      "loss": 1.0455,
+      "step": 640
+    },
+    {
+      "epoch": 0.5674978503869303,
+      "grad_norm": 3.5376293659210205,
+      "learning_rate": 7.800000000000001e-05,
+      "loss": 1.0034,
+      "step": 660
+    },
+    {
+      "epoch": 0.58469475494411,
+      "grad_norm": 5.122928619384766,
+      "learning_rate": 7.733333333333333e-05,
+      "loss": 0.9539,
+      "step": 680
+    },
+    {
+      "epoch": 0.6018916595012898,
+      "grad_norm": 4.396443843841553,
+      "learning_rate": 7.666666666666667e-05,
+      "loss": 1.0106,
+      "step": 700
+    },
+    {
+      "epoch": 0.6190885640584695,
+      "grad_norm": 5.2031989097595215,
+      "learning_rate": 7.6e-05,
+      "loss": 1.1025,
+      "step": 720
+    },
+    {
+      "epoch": 0.6362854686156492,
+      "grad_norm": 4.93772554397583,
+      "learning_rate": 7.533333333333334e-05,
+      "loss": 1.0214,
+      "step": 740
+    },
+    {
+      "epoch": 0.6534823731728289,
+      "grad_norm": 3.970015048980713,
+      "learning_rate": 7.466666666666667e-05,
+      "loss": 0.8724,
+      "step": 760
+    },
+    {
+      "epoch": 0.6706792777300086,
+      "grad_norm": 4.316510200500488,
+      "learning_rate": 7.4e-05,
+      "loss": 0.9296,
+      "step": 780
+    },
+    {
+      "epoch": 0.6878761822871883,
+      "grad_norm": 5.551044464111328,
+      "learning_rate": 7.333333333333333e-05,
+      "loss": 0.9748,
+      "step": 800
+    },
+    {
+      "epoch": 0.705073086844368,
+      "grad_norm": 5.091616630554199,
+      "learning_rate": 7.266666666666667e-05,
+      "loss": 0.9048,
+      "step": 820
+    },
+    {
+      "epoch": 0.7222699914015477,
+      "grad_norm": 5.082363128662109,
+      "learning_rate": 7.2e-05,
+      "loss": 0.9605,
+      "step": 840
+    },
+    {
+      "epoch": 0.7394668959587274,
+      "grad_norm": 4.591577053070068,
+      "learning_rate": 7.133333333333334e-05,
+      "loss": 0.803,
+      "step": 860
+    },
+    {
+      "epoch": 0.7566638005159071,
+      "grad_norm": 3.200929880142212,
+      "learning_rate": 7.066666666666667e-05,
+      "loss": 0.8525,
+      "step": 880
+    },
+    {
+      "epoch": 0.7738607050730868,
+      "grad_norm": 5.56381368637085,
+      "learning_rate": 7e-05,
+      "loss": 0.8088,
+      "step": 900
+    },
+    {
+      "epoch": 0.7910576096302665,
+      "grad_norm": 4.371031761169434,
+      "learning_rate": 6.933333333333334e-05,
+      "loss": 0.811,
+      "step": 920
+    },
+    {
+      "epoch": 0.8082545141874462,
+      "grad_norm": 5.641899585723877,
+      "learning_rate": 6.866666666666666e-05,
+      "loss": 0.8693,
+      "step": 940
+    },
+    {
+      "epoch": 0.825451418744626,
+      "grad_norm": 5.0090436935424805,
+      "learning_rate": 6.800000000000001e-05,
+      "loss": 0.7813,
+      "step": 960
+    },
+    {
+      "epoch": 0.8426483233018057,
+      "grad_norm": 7.000046730041504,
+      "learning_rate": 6.733333333333333e-05,
+      "loss": 0.8189,
+      "step": 980
+    },
+    {
+      "epoch": 0.8598452278589854,
+      "grad_norm": 5.533496856689453,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 0.8019,
+      "step": 1000
+    },
+    {
+      "epoch": 0.8770421324161651,
+      "grad_norm": 5.878244400024414,
+      "learning_rate": 6.6e-05,
+      "loss": 0.7308,
+      "step": 1020
+    },
+    {
+      "epoch": 0.8942390369733448,
+      "grad_norm": 6.347448825836182,
+      "learning_rate": 6.533333333333334e-05,
+      "loss": 0.7523,
+      "step": 1040
+    },
+    {
+      "epoch": 0.9114359415305245,
+      "grad_norm": 5.9593634605407715,
+      "learning_rate": 6.466666666666666e-05,
+      "loss": 0.7736,
+      "step": 1060
+    },
+    {
+      "epoch": 0.9286328460877042,
+      "grad_norm": 5.173058986663818,
+      "learning_rate": 6.400000000000001e-05,
+      "loss": 0.803,
+      "step": 1080
+    },
+    {
+      "epoch": 0.945829750644884,
+      "grad_norm": 6.1787109375,
+      "learning_rate": 6.333333333333333e-05,
+      "loss": 0.7256,
+      "step": 1100
+    },
+    {
+      "epoch": 0.9630266552020637,
+      "grad_norm": 5.627285957336426,
+      "learning_rate": 6.266666666666667e-05,
+      "loss": 0.7492,
+      "step": 1120
+    },
+    {
+      "epoch": 0.9802235597592434,
+      "grad_norm": 5.914905071258545,
+      "learning_rate": 6.2e-05,
+      "loss": 0.6695,
+      "step": 1140
+    },
+    {
+      "epoch": 0.9974204643164231,
+      "grad_norm": 4.068761825561523,
+      "learning_rate": 6.133333333333334e-05,
+      "loss": 0.6607,
+      "step": 1160
+    },
+    {
+      "epoch": 1.0146173688736027,
+      "grad_norm": 5.116635322570801,
+      "learning_rate": 6.066666666666667e-05,
+      "loss": 0.5824,
+      "step": 1180
+    },
+    {
+      "epoch": 1.0318142734307825,
+      "grad_norm": 6.764676570892334,
+      "learning_rate": 6e-05,
+      "loss": 0.6238,
+      "step": 1200
+    },
+    {
+      "epoch": 1.049011177987962,
+      "grad_norm": 3.931511640548706,
+      "learning_rate": 5.9333333333333343e-05,
+      "loss": 0.5651,
+      "step": 1220
+    },
+    {
+      "epoch": 1.0662080825451419,
+      "grad_norm": 9.559135437011719,
+      "learning_rate": 5.866666666666667e-05,
+      "loss": 0.5615,
+      "step": 1240
+    },
+    {
+      "epoch": 1.0834049871023215,
+      "grad_norm": 8.055045127868652,
+      "learning_rate": 5.8e-05,
+      "loss": 0.5606,
+      "step": 1260
+    },
+    {
+      "epoch": 1.1006018916595013,
+      "grad_norm": 6.782190322875977,
+      "learning_rate": 5.7333333333333336e-05,
+      "loss": 0.5776,
+      "step": 1280
+    },
+    {
+      "epoch": 1.117798796216681,
+      "grad_norm": 5.142735004425049,
+      "learning_rate": 5.666666666666667e-05,
+      "loss": 0.5509,
+      "step": 1300
+    },
+    {
+      "epoch": 1.1349957007738607,
+      "grad_norm": 6.010578155517578,
+      "learning_rate": 5.6000000000000006e-05,
+      "loss": 0.5701,
+      "step": 1320
+    },
+    {
+      "epoch": 1.1521926053310405,
+      "grad_norm": 5.171779155731201,
+      "learning_rate": 5.5333333333333334e-05,
+      "loss": 0.5485,
+      "step": 1340
+    },
+    {
+      "epoch": 1.16938950988822,
+      "grad_norm": 5.51332426071167,
+      "learning_rate": 5.466666666666666e-05,
+      "loss": 0.5515,
+      "step": 1360
+    },
+    {
+      "epoch": 1.1865864144454,
+      "grad_norm": 6.2720947265625,
+      "learning_rate": 5.4000000000000005e-05,
+      "loss": 0.5503,
+      "step": 1380
+    },
+    {
+      "epoch": 1.2037833190025795,
+      "grad_norm": 6.498877048492432,
+      "learning_rate": 5.333333333333333e-05,
+      "loss": 0.525,
+      "step": 1400
+    },
+    {
+      "epoch": 1.2209802235597593,
+      "grad_norm": 5.129275321960449,
+      "learning_rate": 5.266666666666666e-05,
+      "loss": 0.4942,
+      "step": 1420
+    },
+    {
+      "epoch": 1.238177128116939,
+      "grad_norm": 4.145976543426514,
+      "learning_rate": 5.2000000000000004e-05,
+      "loss": 0.4961,
+      "step": 1440
+    },
+    {
+      "epoch": 1.2553740326741187,
+      "grad_norm": 6.678504467010498,
+      "learning_rate": 5.133333333333333e-05,
+      "loss": 0.5032,
+      "step": 1460
+    },
+    {
+      "epoch": 1.2725709372312983,
+      "grad_norm": 6.846457481384277,
+      "learning_rate": 5.0666666666666674e-05,
+      "loss": 0.5322,
+      "step": 1480
+    },
+    {
+      "epoch": 1.2897678417884781,
+      "grad_norm": 7.58906888961792,
+      "learning_rate": 5e-05,
+      "loss": 0.477,
+      "step": 1500
+    },
+    {
+      "epoch": 1.3069647463456577,
+      "grad_norm": 5.045712947845459,
+      "learning_rate": 4.933333333333334e-05,
+      "loss": 0.5139,
+      "step": 1520
+    },
+    {
+      "epoch": 1.3241616509028376,
+      "grad_norm": 4.599825859069824,
+      "learning_rate": 4.866666666666667e-05,
+      "loss": 0.4997,
+      "step": 1540
+    },
+    {
+      "epoch": 1.3413585554600171,
+      "grad_norm": 5.756386756896973,
+      "learning_rate": 4.8e-05,
+      "loss": 0.4841,
+      "step": 1560
+    },
+    {
+      "epoch": 1.358555460017197,
+      "grad_norm": 4.89516544342041,
+      "learning_rate": 4.7333333333333336e-05,
+      "loss": 0.4183,
+      "step": 1580
+    },
+    {
+      "epoch": 1.3757523645743766,
+      "grad_norm": 4.484691143035889,
+      "learning_rate": 4.666666666666667e-05,
+      "loss": 0.4185,
+      "step": 1600
+    },
+    {
+      "epoch": 1.3929492691315564,
+      "grad_norm": 5.203677654266357,
+      "learning_rate": 4.600000000000001e-05,
+      "loss": 0.4097,
+      "step": 1620
+    },
+    {
+      "epoch": 1.410146173688736,
+      "grad_norm": 6.922574996948242,
+      "learning_rate": 4.5333333333333335e-05,
+      "loss": 0.4367,
+      "step": 1640
+    },
+    {
+      "epoch": 1.4273430782459158,
+      "grad_norm": 5.618770122528076,
+      "learning_rate": 4.466666666666667e-05,
+      "loss": 0.4453,
+      "step": 1660
+    },
+    {
+      "epoch": 1.4445399828030954,
+      "grad_norm": 4.377410888671875,
+      "learning_rate": 4.4000000000000006e-05,
+      "loss": 0.4416,
+      "step": 1680
+    },
+    {
+      "epoch": 1.4617368873602752,
+      "grad_norm": 7.413736343383789,
+      "learning_rate": 4.3333333333333334e-05,
+      "loss": 0.4488,
+      "step": 1700
+    },
+    {
+      "epoch": 1.4789337919174548,
+      "grad_norm": 4.4008049964904785,
+      "learning_rate": 4.266666666666667e-05,
+      "loss": 0.4167,
+      "step": 1720
+    },
+    {
+      "epoch": 1.4961306964746346,
+      "grad_norm": 5.33242130279541,
+      "learning_rate": 4.2e-05,
+      "loss": 0.486,
+      "step": 1740
+    },
+    {
+      "epoch": 1.5133276010318144,
+      "grad_norm": 7.043882369995117,
+      "learning_rate": 4.133333333333333e-05,
+      "loss": 0.407,
+      "step": 1760
+    },
+    {
+      "epoch": 1.530524505588994,
+      "grad_norm": 6.068751335144043,
+      "learning_rate": 4.066666666666667e-05,
+      "loss": 0.3846,
+      "step": 1780
+    },
+    {
+      "epoch": 1.5477214101461736,
+      "grad_norm": 5.452756404876709,
+      "learning_rate": 4e-05,
+      "loss": 0.4327,
+      "step": 1800
+    },
+    {
+      "epoch": 1.5649183147033534,
+      "grad_norm": 3.541025161743164,
+      "learning_rate": 3.933333333333333e-05,
+      "loss": 0.3734,
+      "step": 1820
+    },
+    {
+      "epoch": 1.5821152192605332,
+      "grad_norm": 6.634982585906982,
+      "learning_rate": 3.866666666666667e-05,
+      "loss": 0.3679,
+      "step": 1840
+    },
+    {
+      "epoch": 1.5993121238177128,
+      "grad_norm": 3.89568829536438,
+      "learning_rate": 3.8e-05,
+      "loss": 0.372,
+      "step": 1860
+    },
+    {
+      "epoch": 1.6165090283748924,
+      "grad_norm": 6.27597188949585,
+      "learning_rate": 3.733333333333334e-05,
+      "loss": 0.3971,
+      "step": 1880
+    },
+    {
+      "epoch": 1.6337059329320722,
+      "grad_norm": 3.7944319248199463,
+      "learning_rate": 3.6666666666666666e-05,
+      "loss": 0.3854,
+      "step": 1900
+    },
+    {
+      "epoch": 1.650902837489252,
+      "grad_norm": 4.370260238647461,
+      "learning_rate": 3.6e-05,
+      "loss": 0.3609,
+      "step": 1920
+    },
+    {
+      "epoch": 1.6680997420464316,
+      "grad_norm": 5.236139297485352,
+      "learning_rate": 3.5333333333333336e-05,
+      "loss": 0.3932,
+      "step": 1940
+    },
+    {
+      "epoch": 1.6852966466036112,
+      "grad_norm": 4.053391456604004,
+      "learning_rate": 3.466666666666667e-05,
+      "loss": 0.4073,
+      "step": 1960
+    },
+    {
+      "epoch": 1.702493551160791,
+      "grad_norm": 4.105246543884277,
+      "learning_rate": 3.4000000000000007e-05,
+      "loss": 0.3533,
+      "step": 1980
+    },
+    {
+      "epoch": 1.7196904557179709,
+      "grad_norm": 4.842738151550293,
+      "learning_rate": 3.3333333333333335e-05,
+      "loss": 0.3529,
+      "step": 2000
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 3000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.0128778067440435e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

outputs/checkpoint-2000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:171c89f8ecd1388fc79e0fbedd3775495c90abd0688b87c0090ac48cd2e89673
+size 5048

outputs/checkpoint-2500/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+library_name: peft
+base_model: beomi/KoAlpaca-Polyglot-5.8B
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.10.1.dev0

outputs/checkpoint-2500/adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "beomi/KoAlpaca-Polyglot-5.8B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

outputs/checkpoint-2500/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8fa144cac0d9b728947dd6cb86ebea59e4c2810672dbb654ff6fb65b72084c0
+size 14688200

outputs/checkpoint-2500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2a9cd5929e76765cc535b162fe03157bd6e42ec76a3cdc4098d8e49d72c40713
+size 29407610

outputs/checkpoint-2500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2e3187411aedbf5a177d63fe5663707e8a773b1c11f0a233ea2b88b4f8443ad9
+size 14244

outputs/checkpoint-2500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e5d399912a537bb9db3ba6d796659509ef14a4c254a8f55b8d2f624198afb71a
+size 1064

outputs/checkpoint-2500/trainer_state.json ADDED Viewed

	@@ -0,0 +1,908 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.1496130696474633,
+  "eval_steps": 500,
+  "global_step": 2500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.017196904557179708,
+      "grad_norm": 1.7918040752410889,
+      "learning_rate": 9.933333333333334e-05,
+      "loss": 4.1644,
+      "step": 20
+    },
+    {
+      "epoch": 0.034393809114359415,
+      "grad_norm": 1.8427823781967163,
+      "learning_rate": 9.866666666666668e-05,
+      "loss": 2.7767,
+      "step": 40
+    },
+    {
+      "epoch": 0.051590713671539126,
+      "grad_norm": 1.2594960927963257,
+      "learning_rate": 9.8e-05,
+      "loss": 2.3015,
+      "step": 60
+    },
+    {
+      "epoch": 0.06878761822871883,
+      "grad_norm": 1.5001391172409058,
+      "learning_rate": 9.733333333333335e-05,
+      "loss": 1.9096,
+      "step": 80
+    },
+    {
+      "epoch": 0.08598452278589853,
+      "grad_norm": 1.4705618619918823,
+      "learning_rate": 9.666666666666667e-05,
+      "loss": 1.8592,
+      "step": 100
+    },
+    {
+      "epoch": 0.10318142734307825,
+      "grad_norm": 1.678035020828247,
+      "learning_rate": 9.6e-05,
+      "loss": 1.7618,
+      "step": 120
+    },
+    {
+      "epoch": 0.12037833190025796,
+      "grad_norm": 1.9186018705368042,
+      "learning_rate": 9.533333333333334e-05,
+      "loss": 1.612,
+      "step": 140
+    },
+    {
+      "epoch": 0.13757523645743766,
+      "grad_norm": 2.0859336853027344,
+      "learning_rate": 9.466666666666667e-05,
+      "loss": 1.5829,
+      "step": 160
+    },
+    {
+      "epoch": 0.15477214101461736,
+      "grad_norm": 2.2418243885040283,
+      "learning_rate": 9.4e-05,
+      "loss": 1.6236,
+      "step": 180
+    },
+    {
+      "epoch": 0.17196904557179707,
+      "grad_norm": 2.3599705696105957,
+      "learning_rate": 9.333333333333334e-05,
+      "loss": 1.5204,
+      "step": 200
+    },
+    {
+      "epoch": 0.18916595012897677,
+      "grad_norm": 2.346595525741577,
+      "learning_rate": 9.266666666666666e-05,
+      "loss": 1.4757,
+      "step": 220
+    },
+    {
+      "epoch": 0.2063628546861565,
+      "grad_norm": 2.483389139175415,
+      "learning_rate": 9.200000000000001e-05,
+      "loss": 1.4369,
+      "step": 240
+    },
+    {
+      "epoch": 0.2235597592433362,
+      "grad_norm": 2.320002555847168,
+      "learning_rate": 9.133333333333334e-05,
+      "loss": 1.391,
+      "step": 260
+    },
+    {
+      "epoch": 0.2407566638005159,
+      "grad_norm": 3.222677230834961,
+      "learning_rate": 9.066666666666667e-05,
+      "loss": 1.5182,
+      "step": 280
+    },
+    {
+      "epoch": 0.2579535683576956,
+      "grad_norm": 2.7384626865386963,
+      "learning_rate": 9e-05,
+      "loss": 1.1515,
+      "step": 300
+    },
+    {
+      "epoch": 0.2751504729148753,
+      "grad_norm": 3.28292179107666,
+      "learning_rate": 8.933333333333334e-05,
+      "loss": 1.3981,
+      "step": 320
+    },
+    {
+      "epoch": 0.292347377472055,
+      "grad_norm": 2.6418075561523438,
+      "learning_rate": 8.866666666666668e-05,
+      "loss": 1.448,
+      "step": 340
+    },
+    {
+      "epoch": 0.30954428202923473,
+      "grad_norm": 2.810594081878662,
+      "learning_rate": 8.800000000000001e-05,
+      "loss": 1.1637,
+      "step": 360
+    },
+    {
+      "epoch": 0.32674118658641443,
+      "grad_norm": 2.897336006164551,
+      "learning_rate": 8.733333333333333e-05,
+      "loss": 1.3715,
+      "step": 380
+    },
+    {
+      "epoch": 0.34393809114359414,
+      "grad_norm": 3.5841643810272217,
+      "learning_rate": 8.666666666666667e-05,
+      "loss": 1.3044,
+      "step": 400
+    },
+    {
+      "epoch": 0.36113499570077384,
+      "grad_norm": 3.0653135776519775,
+      "learning_rate": 8.6e-05,
+      "loss": 1.1584,
+      "step": 420
+    },
+    {
+      "epoch": 0.37833190025795355,
+      "grad_norm": 3.761073112487793,
+      "learning_rate": 8.533333333333334e-05,
+      "loss": 1.2224,
+      "step": 440
+    },
+    {
+      "epoch": 0.39552880481513325,
+      "grad_norm": 3.481926441192627,
+      "learning_rate": 8.466666666666667e-05,
+      "loss": 1.1676,
+      "step": 460
+    },
+    {
+      "epoch": 0.412725709372313,
+      "grad_norm": 4.327862739562988,
+      "learning_rate": 8.4e-05,
+      "loss": 1.0294,
+      "step": 480
+    },
+    {
+      "epoch": 0.4299226139294927,
+      "grad_norm": 4.155755996704102,
+      "learning_rate": 8.333333333333334e-05,
+      "loss": 1.2208,
+      "step": 500
+    },
+    {
+      "epoch": 0.4471195184866724,
+      "grad_norm": 3.50590443611145,
+      "learning_rate": 8.266666666666667e-05,
+      "loss": 1.0706,
+      "step": 520
+    },
+    {
+      "epoch": 0.4643164230438521,
+      "grad_norm": 4.00937557220459,
+      "learning_rate": 8.2e-05,
+      "loss": 1.0627,
+      "step": 540
+    },
+    {
+      "epoch": 0.4815133276010318,
+      "grad_norm": 4.476954460144043,
+      "learning_rate": 8.133333333333334e-05,
+      "loss": 1.0246,
+      "step": 560
+    },
+    {
+      "epoch": 0.49871023215821153,
+      "grad_norm": 4.1531476974487305,
+      "learning_rate": 8.066666666666667e-05,
+      "loss": 1.2647,
+      "step": 580
+    },
+    {
+      "epoch": 0.5159071367153912,
+      "grad_norm": 3.9548251628875732,
+      "learning_rate": 8e-05,
+      "loss": 0.9846,
+      "step": 600
+    },
+    {
+      "epoch": 0.5331040412725709,
+      "grad_norm": 4.803060531616211,
+      "learning_rate": 7.933333333333334e-05,
+      "loss": 0.9058,
+      "step": 620
+    },
+    {
+      "epoch": 0.5503009458297506,
+      "grad_norm": 4.116948127746582,
+      "learning_rate": 7.866666666666666e-05,
+      "loss": 1.0455,
+      "step": 640
+    },
+    {
+      "epoch": 0.5674978503869303,
+      "grad_norm": 3.5376293659210205,
+      "learning_rate": 7.800000000000001e-05,
+      "loss": 1.0034,
+      "step": 660
+    },
+    {
+      "epoch": 0.58469475494411,
+      "grad_norm": 5.122928619384766,
+      "learning_rate": 7.733333333333333e-05,
+      "loss": 0.9539,
+      "step": 680
+    },
+    {
+      "epoch": 0.6018916595012898,
+      "grad_norm": 4.396443843841553,
+      "learning_rate": 7.666666666666667e-05,
+      "loss": 1.0106,
+      "step": 700
+    },
+    {
+      "epoch": 0.6190885640584695,
+      "grad_norm": 5.2031989097595215,
+      "learning_rate": 7.6e-05,
+      "loss": 1.1025,
+      "step": 720
+    },
+    {
+      "epoch": 0.6362854686156492,
+      "grad_norm": 4.93772554397583,
+      "learning_rate": 7.533333333333334e-05,
+      "loss": 1.0214,
+      "step": 740
+    },
+    {
+      "epoch": 0.6534823731728289,
+      "grad_norm": 3.970015048980713,
+      "learning_rate": 7.466666666666667e-05,
+      "loss": 0.8724,
+      "step": 760
+    },
+    {
+      "epoch": 0.6706792777300086,
+      "grad_norm": 4.316510200500488,
+      "learning_rate": 7.4e-05,
+      "loss": 0.9296,
+      "step": 780
+    },
+    {
+      "epoch": 0.6878761822871883,
+      "grad_norm": 5.551044464111328,
+      "learning_rate": 7.333333333333333e-05,
+      "loss": 0.9748,
+      "step": 800
+    },
+    {
+      "epoch": 0.705073086844368,
+      "grad_norm": 5.091616630554199,
+      "learning_rate": 7.266666666666667e-05,
+      "loss": 0.9048,
+      "step": 820
+    },
+    {
+      "epoch": 0.7222699914015477,
+      "grad_norm": 5.082363128662109,
+      "learning_rate": 7.2e-05,
+      "loss": 0.9605,
+      "step": 840
+    },
+    {
+      "epoch": 0.7394668959587274,
+      "grad_norm": 4.591577053070068,
+      "learning_rate": 7.133333333333334e-05,
+      "loss": 0.803,
+      "step": 860
+    },
+    {
+      "epoch": 0.7566638005159071,
+      "grad_norm": 3.200929880142212,
+      "learning_rate": 7.066666666666667e-05,
+      "loss": 0.8525,
+      "step": 880
+    },
+    {
+      "epoch": 0.7738607050730868,
+      "grad_norm": 5.56381368637085,
+      "learning_rate": 7e-05,
+      "loss": 0.8088,
+      "step": 900
+    },
+    {
+      "epoch": 0.7910576096302665,
+      "grad_norm": 4.371031761169434,
+      "learning_rate": 6.933333333333334e-05,
+      "loss": 0.811,
+      "step": 920
+    },
+    {
+      "epoch": 0.8082545141874462,
+      "grad_norm": 5.641899585723877,
+      "learning_rate": 6.866666666666666e-05,
+      "loss": 0.8693,
+      "step": 940
+    },
+    {
+      "epoch": 0.825451418744626,
+      "grad_norm": 5.0090436935424805,
+      "learning_rate": 6.800000000000001e-05,
+      "loss": 0.7813,
+      "step": 960
+    },
+    {
+      "epoch": 0.8426483233018057,
+      "grad_norm": 7.000046730041504,
+      "learning_rate": 6.733333333333333e-05,
+      "loss": 0.8189,
+      "step": 980
+    },
+    {
+      "epoch": 0.8598452278589854,
+      "grad_norm": 5.533496856689453,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 0.8019,
+      "step": 1000
+    },
+    {
+      "epoch": 0.8770421324161651,
+      "grad_norm": 5.878244400024414,
+      "learning_rate": 6.6e-05,
+      "loss": 0.7308,
+      "step": 1020
+    },
+    {
+      "epoch": 0.8942390369733448,
+      "grad_norm": 6.347448825836182,
+      "learning_rate": 6.533333333333334e-05,
+      "loss": 0.7523,
+      "step": 1040
+    },
+    {
+      "epoch": 0.9114359415305245,
+      "grad_norm": 5.9593634605407715,
+      "learning_rate": 6.466666666666666e-05,
+      "loss": 0.7736,
+      "step": 1060
+    },
+    {
+      "epoch": 0.9286328460877042,
+      "grad_norm": 5.173058986663818,
+      "learning_rate": 6.400000000000001e-05,
+      "loss": 0.803,
+      "step": 1080
+    },
+    {
+      "epoch": 0.945829750644884,
+      "grad_norm": 6.1787109375,
+      "learning_rate": 6.333333333333333e-05,
+      "loss": 0.7256,
+      "step": 1100
+    },
+    {
+      "epoch": 0.9630266552020637,
+      "grad_norm": 5.627285957336426,
+      "learning_rate": 6.266666666666667e-05,
+      "loss": 0.7492,
+      "step": 1120
+    },
+    {
+      "epoch": 0.9802235597592434,
+      "grad_norm": 5.914905071258545,
+      "learning_rate": 6.2e-05,
+      "loss": 0.6695,
+      "step": 1140
+    },
+    {
+      "epoch": 0.9974204643164231,
+      "grad_norm": 4.068761825561523,
+      "learning_rate": 6.133333333333334e-05,
+      "loss": 0.6607,
+      "step": 1160
+    },
+    {
+      "epoch": 1.0146173688736027,
+      "grad_norm": 5.116635322570801,
+      "learning_rate": 6.066666666666667e-05,
+      "loss": 0.5824,
+      "step": 1180
+    },
+    {
+      "epoch": 1.0318142734307825,
+      "grad_norm": 6.764676570892334,
+      "learning_rate": 6e-05,
+      "loss": 0.6238,
+      "step": 1200
+    },
+    {
+      "epoch": 1.049011177987962,
+      "grad_norm": 3.931511640548706,
+      "learning_rate": 5.9333333333333343e-05,
+      "loss": 0.5651,
+      "step": 1220
+    },
+    {
+      "epoch": 1.0662080825451419,
+      "grad_norm": 9.559135437011719,
+      "learning_rate": 5.866666666666667e-05,
+      "loss": 0.5615,
+      "step": 1240
+    },
+    {
+      "epoch": 1.0834049871023215,
+      "grad_norm": 8.055045127868652,
+      "learning_rate": 5.8e-05,
+      "loss": 0.5606,
+      "step": 1260
+    },
+    {
+      "epoch": 1.1006018916595013,
+      "grad_norm": 6.782190322875977,
+      "learning_rate": 5.7333333333333336e-05,
+      "loss": 0.5776,
+      "step": 1280
+    },
+    {
+      "epoch": 1.117798796216681,
+      "grad_norm": 5.142735004425049,
+      "learning_rate": 5.666666666666667e-05,
+      "loss": 0.5509,
+      "step": 1300
+    },
+    {
+      "epoch": 1.1349957007738607,
+      "grad_norm": 6.010578155517578,
+      "learning_rate": 5.6000000000000006e-05,
+      "loss": 0.5701,
+      "step": 1320
+    },
+    {
+      "epoch": 1.1521926053310405,
+      "grad_norm": 5.171779155731201,
+      "learning_rate": 5.5333333333333334e-05,
+      "loss": 0.5485,
+      "step": 1340
+    },
+    {
+      "epoch": 1.16938950988822,
+      "grad_norm": 5.51332426071167,
+      "learning_rate": 5.466666666666666e-05,
+      "loss": 0.5515,
+      "step": 1360
+    },
+    {
+      "epoch": 1.1865864144454,
+      "grad_norm": 6.2720947265625,
+      "learning_rate": 5.4000000000000005e-05,
+      "loss": 0.5503,
+      "step": 1380
+    },
+    {
+      "epoch": 1.2037833190025795,
+      "grad_norm": 6.498877048492432,
+      "learning_rate": 5.333333333333333e-05,
+      "loss": 0.525,
+      "step": 1400
+    },
+    {
+      "epoch": 1.2209802235597593,
+      "grad_norm": 5.129275321960449,
+      "learning_rate": 5.266666666666666e-05,
+      "loss": 0.4942,
+      "step": 1420
+    },
+    {
+      "epoch": 1.238177128116939,
+      "grad_norm": 4.145976543426514,
+      "learning_rate": 5.2000000000000004e-05,
+      "loss": 0.4961,
+      "step": 1440
+    },
+    {
+      "epoch": 1.2553740326741187,
+      "grad_norm": 6.678504467010498,
+      "learning_rate": 5.133333333333333e-05,
+      "loss": 0.5032,
+      "step": 1460
+    },
+    {
+      "epoch": 1.2725709372312983,
+      "grad_norm": 6.846457481384277,
+      "learning_rate": 5.0666666666666674e-05,
+      "loss": 0.5322,
+      "step": 1480
+    },
+    {
+      "epoch": 1.2897678417884781,
+      "grad_norm": 7.58906888961792,
+      "learning_rate": 5e-05,
+      "loss": 0.477,
+      "step": 1500
+    },
+    {
+      "epoch": 1.3069647463456577,
+      "grad_norm": 5.045712947845459,
+      "learning_rate": 4.933333333333334e-05,
+      "loss": 0.5139,
+      "step": 1520
+    },
+    {
+      "epoch": 1.3241616509028376,
+      "grad_norm": 4.599825859069824,
+      "learning_rate": 4.866666666666667e-05,
+      "loss": 0.4997,
+      "step": 1540
+    },
+    {
+      "epoch": 1.3413585554600171,
+      "grad_norm": 5.756386756896973,
+      "learning_rate": 4.8e-05,
+      "loss": 0.4841,
+      "step": 1560
+    },
+    {
+      "epoch": 1.358555460017197,
+      "grad_norm": 4.89516544342041,
+      "learning_rate": 4.7333333333333336e-05,
+      "loss": 0.4183,
+      "step": 1580
+    },
+    {
+      "epoch": 1.3757523645743766,
+      "grad_norm": 4.484691143035889,
+      "learning_rate": 4.666666666666667e-05,
+      "loss": 0.4185,
+      "step": 1600
+    },
+    {
+      "epoch": 1.3929492691315564,
+      "grad_norm": 5.203677654266357,
+      "learning_rate": 4.600000000000001e-05,
+      "loss": 0.4097,
+      "step": 1620
+    },
+    {
+      "epoch": 1.410146173688736,
+      "grad_norm": 6.922574996948242,
+      "learning_rate": 4.5333333333333335e-05,
+      "loss": 0.4367,
+      "step": 1640
+    },
+    {
+      "epoch": 1.4273430782459158,
+      "grad_norm": 5.618770122528076,
+      "learning_rate": 4.466666666666667e-05,
+      "loss": 0.4453,
+      "step": 1660
+    },
+    {
+      "epoch": 1.4445399828030954,
+      "grad_norm": 4.377410888671875,
+      "learning_rate": 4.4000000000000006e-05,
+      "loss": 0.4416,
+      "step": 1680
+    },
+    {
+      "epoch": 1.4617368873602752,
+      "grad_norm": 7.413736343383789,
+      "learning_rate": 4.3333333333333334e-05,
+      "loss": 0.4488,
+      "step": 1700
+    },
+    {
+      "epoch": 1.4789337919174548,
+      "grad_norm": 4.4008049964904785,
+      "learning_rate": 4.266666666666667e-05,
+      "loss": 0.4167,
+      "step": 1720
+    },
+    {
+      "epoch": 1.4961306964746346,
+      "grad_norm": 5.33242130279541,
+      "learning_rate": 4.2e-05,
+      "loss": 0.486,
+      "step": 1740
+    },
+    {
+      "epoch": 1.5133276010318144,
+      "grad_norm": 7.043882369995117,
+      "learning_rate": 4.133333333333333e-05,
+      "loss": 0.407,
+      "step": 1760
+    },
+    {
+      "epoch": 1.530524505588994,
+      "grad_norm": 6.068751335144043,
+      "learning_rate": 4.066666666666667e-05,
+      "loss": 0.3846,
+      "step": 1780
+    },
+    {
+      "epoch": 1.5477214101461736,
+      "grad_norm": 5.452756404876709,
+      "learning_rate": 4e-05,
+      "loss": 0.4327,
+      "step": 1800
+    },
+    {
+      "epoch": 1.5649183147033534,
+      "grad_norm": 3.541025161743164,
+      "learning_rate": 3.933333333333333e-05,
+      "loss": 0.3734,
+      "step": 1820
+    },
+    {
+      "epoch": 1.5821152192605332,
+      "grad_norm": 6.634982585906982,
+      "learning_rate": 3.866666666666667e-05,
+      "loss": 0.3679,
+      "step": 1840
+    },
+    {
+      "epoch": 1.5993121238177128,
+      "grad_norm": 3.89568829536438,
+      "learning_rate": 3.8e-05,
+      "loss": 0.372,
+      "step": 1860
+    },
+    {
+      "epoch": 1.6165090283748924,
+      "grad_norm": 6.27597188949585,
+      "learning_rate": 3.733333333333334e-05,
+      "loss": 0.3971,
+      "step": 1880
+    },
+    {
+      "epoch": 1.6337059329320722,
+      "grad_norm": 3.7944319248199463,
+      "learning_rate": 3.6666666666666666e-05,
+      "loss": 0.3854,
+      "step": 1900
+    },
+    {
+      "epoch": 1.650902837489252,
+      "grad_norm": 4.370260238647461,
+      "learning_rate": 3.6e-05,
+      "loss": 0.3609,
+      "step": 1920
+    },
+    {
+      "epoch": 1.6680997420464316,
+      "grad_norm": 5.236139297485352,
+      "learning_rate": 3.5333333333333336e-05,
+      "loss": 0.3932,
+      "step": 1940
+    },
+    {
+      "epoch": 1.6852966466036112,
+      "grad_norm": 4.053391456604004,
+      "learning_rate": 3.466666666666667e-05,
+      "loss": 0.4073,
+      "step": 1960
+    },
+    {
+      "epoch": 1.702493551160791,
+      "grad_norm": 4.105246543884277,
+      "learning_rate": 3.4000000000000007e-05,
+      "loss": 0.3533,
+      "step": 1980
+    },
+    {
+      "epoch": 1.7196904557179709,
+      "grad_norm": 4.842738151550293,
+      "learning_rate": 3.3333333333333335e-05,
+      "loss": 0.3529,
+      "step": 2000
+    },
+    {
+      "epoch": 1.7368873602751504,
+      "grad_norm": 6.755301475524902,
+      "learning_rate": 3.266666666666667e-05,
+      "loss": 0.4043,
+      "step": 2020
+    },
+    {
+      "epoch": 1.75408426483233,
+      "grad_norm": 5.9597578048706055,
+      "learning_rate": 3.2000000000000005e-05,
+      "loss": 0.3603,
+      "step": 2040
+    },
+    {
+      "epoch": 1.7712811693895099,
+      "grad_norm": 3.4720447063446045,
+      "learning_rate": 3.1333333333333334e-05,
+      "loss": 0.3367,
+      "step": 2060
+    },
+    {
+      "epoch": 1.7884780739466897,
+      "grad_norm": 3.3950035572052,
+      "learning_rate": 3.066666666666667e-05,
+      "loss": 0.3333,
+      "step": 2080
+    },
+    {
+      "epoch": 1.8056749785038693,
+      "grad_norm": 3.364729642868042,
+      "learning_rate": 3e-05,
+      "loss": 0.3416,
+      "step": 2100
+    },
+    {
+      "epoch": 1.8228718830610489,
+      "grad_norm": 5.35959005355835,
+      "learning_rate": 2.9333333333333336e-05,
+      "loss": 0.3487,
+      "step": 2120
+    },
+    {
+      "epoch": 1.8400687876182287,
+      "grad_norm": 4.672595500946045,
+      "learning_rate": 2.8666666666666668e-05,
+      "loss": 0.3225,
+      "step": 2140
+    },
+    {
+      "epoch": 1.8572656921754085,
+      "grad_norm": 4.4767327308654785,
+      "learning_rate": 2.8000000000000003e-05,
+      "loss": 0.3306,
+      "step": 2160
+    },
+    {
+      "epoch": 1.874462596732588,
+      "grad_norm": 5.055034637451172,
+      "learning_rate": 2.733333333333333e-05,
+      "loss": 0.392,
+      "step": 2180
+    },
+    {
+      "epoch": 1.8916595012897677,
+      "grad_norm": 4.375268936157227,
+      "learning_rate": 2.6666666666666667e-05,
+      "loss": 0.3205,
+      "step": 2200
+    },
+    {
+      "epoch": 1.9088564058469477,
+      "grad_norm": 5.488368988037109,
+      "learning_rate": 2.6000000000000002e-05,
+      "loss": 0.3111,
+      "step": 2220
+    },
+    {
+      "epoch": 1.9260533104041273,
+      "grad_norm": 4.405417442321777,
+      "learning_rate": 2.5333333333333337e-05,
+      "loss": 0.3014,
+      "step": 2240
+    },
+    {
+      "epoch": 1.943250214961307,
+      "grad_norm": 3.466012716293335,
+      "learning_rate": 2.466666666666667e-05,
+      "loss": 0.2952,
+      "step": 2260
+    },
+    {
+      "epoch": 1.9604471195184867,
+      "grad_norm": 3.681208848953247,
+      "learning_rate": 2.4e-05,
+      "loss": 0.3012,
+      "step": 2280
+    },
+    {
+      "epoch": 1.9776440240756665,
+      "grad_norm": 6.128725528717041,
+      "learning_rate": 2.3333333333333336e-05,
+      "loss": 0.3056,
+      "step": 2300
+    },
+    {
+      "epoch": 1.9948409286328461,
+      "grad_norm": 4.9947333335876465,
+      "learning_rate": 2.2666666666666668e-05,
+      "loss": 0.3026,
+      "step": 2320
+    },
+    {
+      "epoch": 2.0120378331900257,
+      "grad_norm": 4.199079990386963,
+      "learning_rate": 2.2033333333333335e-05,
+      "loss": 0.3554,
+      "step": 2340
+    },
+    {
+      "epoch": 2.0292347377472053,
+      "grad_norm": 3.375105381011963,
+      "learning_rate": 2.1366666666666667e-05,
+      "loss": 0.2795,
+      "step": 2360
+    },
+    {
+      "epoch": 2.0464316423043853,
+      "grad_norm": 4.369241237640381,
+      "learning_rate": 2.07e-05,
+      "loss": 0.2719,
+      "step": 2380
+    },
+    {
+      "epoch": 2.063628546861565,
+      "grad_norm": 3.1906027793884277,
+      "learning_rate": 2.0033333333333334e-05,
+      "loss": 0.2681,
+      "step": 2400
+    },
+    {
+      "epoch": 2.0808254514187445,
+      "grad_norm": 3.7493109703063965,
+      "learning_rate": 1.9366666666666665e-05,
+      "loss": 0.2765,
+      "step": 2420
+    },
+    {
+      "epoch": 2.098022355975924,
+      "grad_norm": 4.610039234161377,
+      "learning_rate": 1.87e-05,
+      "loss": 0.2734,
+      "step": 2440
+    },
+    {
+      "epoch": 2.115219260533104,
+      "grad_norm": 3.996819019317627,
+      "learning_rate": 1.8033333333333336e-05,
+      "loss": 0.2732,
+      "step": 2460
+    },
+    {
+      "epoch": 2.1324161650902838,
+      "grad_norm": 3.3538951873779297,
+      "learning_rate": 1.7366666666666668e-05,
+      "loss": 0.2796,
+      "step": 2480
+    },
+    {
+      "epoch": 2.1496130696474633,
+      "grad_norm": 2.914961814880371,
+      "learning_rate": 1.6700000000000003e-05,
+      "loss": 0.2572,
+      "step": 2500
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 3000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.2667360205468467e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

outputs/checkpoint-2500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:171c89f8ecd1388fc79e0fbedd3775495c90abd0688b87c0090ac48cd2e89673
+size 5048

outputs/checkpoint-3000/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+library_name: peft
+base_model: beomi/KoAlpaca-Polyglot-5.8B
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.10.1.dev0

outputs/checkpoint-3000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "beomi/KoAlpaca-Polyglot-5.8B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

outputs/checkpoint-3000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db593c2140cd43e4033c707b6e9a19433a0a2d1df3c914c8746516b23ad7af7f
+size 14688200

outputs/checkpoint-3000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0a536873239f5db0e1c24bd10c7eeb633c31ec0158b0ee4d8e9fd3772f31ad74
+size 29407610

outputs/checkpoint-3000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5a96ab8c233d52233c7ea63b8dc1a53f95430afa3fd6199266b3c73a5e02ed94
+size 14244

outputs/checkpoint-3000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb947258805e739261138072bcb7d19e2f267d669994c54218eeb948afbf13ad
+size 1064

outputs/checkpoint-3000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1083 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.5795356835769563,
+  "eval_steps": 500,
+  "global_step": 3000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.017196904557179708,
+      "grad_norm": 1.7918040752410889,
+      "learning_rate": 9.933333333333334e-05,
+      "loss": 4.1644,
+      "step": 20
+    },
+    {
+      "epoch": 0.034393809114359415,
+      "grad_norm": 1.8427823781967163,
+      "learning_rate": 9.866666666666668e-05,
+      "loss": 2.7767,
+      "step": 40
+    },
+    {
+      "epoch": 0.051590713671539126,
+      "grad_norm": 1.2594960927963257,
+      "learning_rate": 9.8e-05,
+      "loss": 2.3015,
+      "step": 60
+    },
+    {
+      "epoch": 0.06878761822871883,
+      "grad_norm": 1.5001391172409058,
+      "learning_rate": 9.733333333333335e-05,
+      "loss": 1.9096,
+      "step": 80
+    },
+    {
+      "epoch": 0.08598452278589853,
+      "grad_norm": 1.4705618619918823,
+      "learning_rate": 9.666666666666667e-05,
+      "loss": 1.8592,
+      "step": 100
+    },
+    {
+      "epoch": 0.10318142734307825,
+      "grad_norm": 1.678035020828247,
+      "learning_rate": 9.6e-05,
+      "loss": 1.7618,
+      "step": 120
+    },
+    {
+      "epoch": 0.12037833190025796,
+      "grad_norm": 1.9186018705368042,
+      "learning_rate": 9.533333333333334e-05,
+      "loss": 1.612,
+      "step": 140
+    },
+    {
+      "epoch": 0.13757523645743766,
+      "grad_norm": 2.0859336853027344,
+      "learning_rate": 9.466666666666667e-05,
+      "loss": 1.5829,
+      "step": 160
+    },
+    {
+      "epoch": 0.15477214101461736,
+      "grad_norm": 2.2418243885040283,
+      "learning_rate": 9.4e-05,
+      "loss": 1.6236,
+      "step": 180
+    },
+    {
+      "epoch": 0.17196904557179707,
+      "grad_norm": 2.3599705696105957,
+      "learning_rate": 9.333333333333334e-05,
+      "loss": 1.5204,
+      "step": 200
+    },
+    {
+      "epoch": 0.18916595012897677,
+      "grad_norm": 2.346595525741577,
+      "learning_rate": 9.266666666666666e-05,
+      "loss": 1.4757,
+      "step": 220
+    },
+    {
+      "epoch": 0.2063628546861565,
+      "grad_norm": 2.483389139175415,
+      "learning_rate": 9.200000000000001e-05,
+      "loss": 1.4369,
+      "step": 240
+    },
+    {
+      "epoch": 0.2235597592433362,
+      "grad_norm": 2.320002555847168,
+      "learning_rate": 9.133333333333334e-05,
+      "loss": 1.391,
+      "step": 260
+    },
+    {
+      "epoch": 0.2407566638005159,
+      "grad_norm": 3.222677230834961,
+      "learning_rate": 9.066666666666667e-05,
+      "loss": 1.5182,
+      "step": 280
+    },
+    {
+      "epoch": 0.2579535683576956,
+      "grad_norm": 2.7384626865386963,
+      "learning_rate": 9e-05,
+      "loss": 1.1515,
+      "step": 300
+    },
+    {
+      "epoch": 0.2751504729148753,
+      "grad_norm": 3.28292179107666,
+      "learning_rate": 8.933333333333334e-05,
+      "loss": 1.3981,
+      "step": 320
+    },
+    {
+      "epoch": 0.292347377472055,
+      "grad_norm": 2.6418075561523438,
+      "learning_rate": 8.866666666666668e-05,
+      "loss": 1.448,
+      "step": 340
+    },
+    {
+      "epoch": 0.30954428202923473,
+      "grad_norm": 2.810594081878662,
+      "learning_rate": 8.800000000000001e-05,
+      "loss": 1.1637,
+      "step": 360
+    },
+    {
+      "epoch": 0.32674118658641443,
+      "grad_norm": 2.897336006164551,
+      "learning_rate": 8.733333333333333e-05,
+      "loss": 1.3715,
+      "step": 380
+    },
+    {
+      "epoch": 0.34393809114359414,
+      "grad_norm": 3.5841643810272217,
+      "learning_rate": 8.666666666666667e-05,
+      "loss": 1.3044,
+      "step": 400
+    },
+    {
+      "epoch": 0.36113499570077384,
+      "grad_norm": 3.0653135776519775,
+      "learning_rate": 8.6e-05,
+      "loss": 1.1584,
+      "step": 420
+    },
+    {
+      "epoch": 0.37833190025795355,
+      "grad_norm": 3.761073112487793,
+      "learning_rate": 8.533333333333334e-05,
+      "loss": 1.2224,
+      "step": 440
+    },
+    {
+      "epoch": 0.39552880481513325,
+      "grad_norm": 3.481926441192627,
+      "learning_rate": 8.466666666666667e-05,
+      "loss": 1.1676,
+      "step": 460
+    },
+    {
+      "epoch": 0.412725709372313,
+      "grad_norm": 4.327862739562988,
+      "learning_rate": 8.4e-05,
+      "loss": 1.0294,
+      "step": 480
+    },
+    {
+      "epoch": 0.4299226139294927,
+      "grad_norm": 4.155755996704102,
+      "learning_rate": 8.333333333333334e-05,
+      "loss": 1.2208,
+      "step": 500
+    },
+    {
+      "epoch": 0.4471195184866724,
+      "grad_norm": 3.50590443611145,
+      "learning_rate": 8.266666666666667e-05,
+      "loss": 1.0706,
+      "step": 520
+    },
+    {
+      "epoch": 0.4643164230438521,
+      "grad_norm": 4.00937557220459,
+      "learning_rate": 8.2e-05,
+      "loss": 1.0627,
+      "step": 540
+    },
+    {
+      "epoch": 0.4815133276010318,
+      "grad_norm": 4.476954460144043,
+      "learning_rate": 8.133333333333334e-05,
+      "loss": 1.0246,
+      "step": 560
+    },
+    {
+      "epoch": 0.49871023215821153,
+      "grad_norm": 4.1531476974487305,
+      "learning_rate": 8.066666666666667e-05,
+      "loss": 1.2647,
+      "step": 580
+    },
+    {
+      "epoch": 0.5159071367153912,
+      "grad_norm": 3.9548251628875732,
+      "learning_rate": 8e-05,
+      "loss": 0.9846,
+      "step": 600
+    },
+    {
+      "epoch": 0.5331040412725709,
+      "grad_norm": 4.803060531616211,
+      "learning_rate": 7.933333333333334e-05,
+      "loss": 0.9058,
+      "step": 620
+    },
+    {
+      "epoch": 0.5503009458297506,
+      "grad_norm": 4.116948127746582,
+      "learning_rate": 7.866666666666666e-05,
+      "loss": 1.0455,
+      "step": 640
+    },
+    {
+      "epoch": 0.5674978503869303,
+      "grad_norm": 3.5376293659210205,
+      "learning_rate": 7.800000000000001e-05,
+      "loss": 1.0034,
+      "step": 660
+    },
+    {
+      "epoch": 0.58469475494411,
+      "grad_norm": 5.122928619384766,
+      "learning_rate": 7.733333333333333e-05,
+      "loss": 0.9539,
+      "step": 680
+    },
+    {
+      "epoch": 0.6018916595012898,
+      "grad_norm": 4.396443843841553,
+      "learning_rate": 7.666666666666667e-05,
+      "loss": 1.0106,
+      "step": 700
+    },
+    {
+      "epoch": 0.6190885640584695,
+      "grad_norm": 5.2031989097595215,
+      "learning_rate": 7.6e-05,
+      "loss": 1.1025,
+      "step": 720
+    },
+    {
+      "epoch": 0.6362854686156492,
+      "grad_norm": 4.93772554397583,
+      "learning_rate": 7.533333333333334e-05,
+      "loss": 1.0214,
+      "step": 740
+    },
+    {
+      "epoch": 0.6534823731728289,
+      "grad_norm": 3.970015048980713,
+      "learning_rate": 7.466666666666667e-05,
+      "loss": 0.8724,
+      "step": 760
+    },
+    {
+      "epoch": 0.6706792777300086,
+      "grad_norm": 4.316510200500488,
+      "learning_rate": 7.4e-05,
+      "loss": 0.9296,
+      "step": 780
+    },
+    {
+      "epoch": 0.6878761822871883,
+      "grad_norm": 5.551044464111328,
+      "learning_rate": 7.333333333333333e-05,
+      "loss": 0.9748,
+      "step": 800
+    },
+    {
+      "epoch": 0.705073086844368,
+      "grad_norm": 5.091616630554199,
+      "learning_rate": 7.266666666666667e-05,
+      "loss": 0.9048,
+      "step": 820
+    },
+    {
+      "epoch": 0.7222699914015477,
+      "grad_norm": 5.082363128662109,
+      "learning_rate": 7.2e-05,
+      "loss": 0.9605,
+      "step": 840
+    },
+    {
+      "epoch": 0.7394668959587274,
+      "grad_norm": 4.591577053070068,
+      "learning_rate": 7.133333333333334e-05,
+      "loss": 0.803,
+      "step": 860
+    },
+    {
+      "epoch": 0.7566638005159071,
+      "grad_norm": 3.200929880142212,
+      "learning_rate": 7.066666666666667e-05,
+      "loss": 0.8525,
+      "step": 880
+    },
+    {
+      "epoch": 0.7738607050730868,
+      "grad_norm": 5.56381368637085,
+      "learning_rate": 7e-05,
+      "loss": 0.8088,
+      "step": 900
+    },
+    {
+      "epoch": 0.7910576096302665,
+      "grad_norm": 4.371031761169434,
+      "learning_rate": 6.933333333333334e-05,
+      "loss": 0.811,
+      "step": 920
+    },
+    {
+      "epoch": 0.8082545141874462,
+      "grad_norm": 5.641899585723877,
+      "learning_rate": 6.866666666666666e-05,
+      "loss": 0.8693,
+      "step": 940
+    },
+    {
+      "epoch": 0.825451418744626,
+      "grad_norm": 5.0090436935424805,
+      "learning_rate": 6.800000000000001e-05,
+      "loss": 0.7813,
+      "step": 960
+    },
+    {
+      "epoch": 0.8426483233018057,
+      "grad_norm": 7.000046730041504,
+      "learning_rate": 6.733333333333333e-05,
+      "loss": 0.8189,
+      "step": 980
+    },
+    {
+      "epoch": 0.8598452278589854,
+      "grad_norm": 5.533496856689453,
+      "learning_rate": 6.666666666666667e-05,
+      "loss": 0.8019,
+      "step": 1000
+    },
+    {
+      "epoch": 0.8770421324161651,
+      "grad_norm": 5.878244400024414,
+      "learning_rate": 6.6e-05,
+      "loss": 0.7308,
+      "step": 1020
+    },
+    {
+      "epoch": 0.8942390369733448,
+      "grad_norm": 6.347448825836182,
+      "learning_rate": 6.533333333333334e-05,
+      "loss": 0.7523,
+      "step": 1040
+    },
+    {
+      "epoch": 0.9114359415305245,
+      "grad_norm": 5.9593634605407715,
+      "learning_rate": 6.466666666666666e-05,
+      "loss": 0.7736,
+      "step": 1060
+    },
+    {
+      "epoch": 0.9286328460877042,
+      "grad_norm": 5.173058986663818,
+      "learning_rate": 6.400000000000001e-05,
+      "loss": 0.803,
+      "step": 1080
+    },
+    {
+      "epoch": 0.945829750644884,
+      "grad_norm": 6.1787109375,
+      "learning_rate": 6.333333333333333e-05,
+      "loss": 0.7256,
+      "step": 1100
+    },
+    {
+      "epoch": 0.9630266552020637,
+      "grad_norm": 5.627285957336426,
+      "learning_rate": 6.266666666666667e-05,
+      "loss": 0.7492,
+      "step": 1120
+    },
+    {
+      "epoch": 0.9802235597592434,
+      "grad_norm": 5.914905071258545,
+      "learning_rate": 6.2e-05,
+      "loss": 0.6695,
+      "step": 1140
+    },
+    {
+      "epoch": 0.9974204643164231,
+      "grad_norm": 4.068761825561523,
+      "learning_rate": 6.133333333333334e-05,
+      "loss": 0.6607,
+      "step": 1160
+    },
+    {
+      "epoch": 1.0146173688736027,
+      "grad_norm": 5.116635322570801,
+      "learning_rate": 6.066666666666667e-05,
+      "loss": 0.5824,
+      "step": 1180
+    },
+    {
+      "epoch": 1.0318142734307825,
+      "grad_norm": 6.764676570892334,
+      "learning_rate": 6e-05,
+      "loss": 0.6238,
+      "step": 1200
+    },
+    {
+      "epoch": 1.049011177987962,
+      "grad_norm": 3.931511640548706,
+      "learning_rate": 5.9333333333333343e-05,
+      "loss": 0.5651,
+      "step": 1220
+    },
+    {
+      "epoch": 1.0662080825451419,
+      "grad_norm": 9.559135437011719,
+      "learning_rate": 5.866666666666667e-05,
+      "loss": 0.5615,
+      "step": 1240
+    },
+    {
+      "epoch": 1.0834049871023215,
+      "grad_norm": 8.055045127868652,
+      "learning_rate": 5.8e-05,
+      "loss": 0.5606,
+      "step": 1260
+    },
+    {
+      "epoch": 1.1006018916595013,
+      "grad_norm": 6.782190322875977,
+      "learning_rate": 5.7333333333333336e-05,
+      "loss": 0.5776,
+      "step": 1280
+    },
+    {
+      "epoch": 1.117798796216681,
+      "grad_norm": 5.142735004425049,
+      "learning_rate": 5.666666666666667e-05,
+      "loss": 0.5509,
+      "step": 1300
+    },
+    {
+      "epoch": 1.1349957007738607,
+      "grad_norm": 6.010578155517578,
+      "learning_rate": 5.6000000000000006e-05,
+      "loss": 0.5701,
+      "step": 1320
+    },
+    {
+      "epoch": 1.1521926053310405,
+      "grad_norm": 5.171779155731201,
+      "learning_rate": 5.5333333333333334e-05,
+      "loss": 0.5485,
+      "step": 1340
+    },
+    {
+      "epoch": 1.16938950988822,
+      "grad_norm": 5.51332426071167,
+      "learning_rate": 5.466666666666666e-05,
+      "loss": 0.5515,
+      "step": 1360
+    },
+    {
+      "epoch": 1.1865864144454,
+      "grad_norm": 6.2720947265625,
+      "learning_rate": 5.4000000000000005e-05,
+      "loss": 0.5503,
+      "step": 1380
+    },
+    {
+      "epoch": 1.2037833190025795,
+      "grad_norm": 6.498877048492432,
+      "learning_rate": 5.333333333333333e-05,
+      "loss": 0.525,
+      "step": 1400
+    },
+    {
+      "epoch": 1.2209802235597593,
+      "grad_norm": 5.129275321960449,
+      "learning_rate": 5.266666666666666e-05,
+      "loss": 0.4942,
+      "step": 1420
+    },
+    {
+      "epoch": 1.238177128116939,
+      "grad_norm": 4.145976543426514,
+      "learning_rate": 5.2000000000000004e-05,
+      "loss": 0.4961,
+      "step": 1440
+    },
+    {
+      "epoch": 1.2553740326741187,
+      "grad_norm": 6.678504467010498,
+      "learning_rate": 5.133333333333333e-05,
+      "loss": 0.5032,
+      "step": 1460
+    },
+    {
+      "epoch": 1.2725709372312983,
+      "grad_norm": 6.846457481384277,
+      "learning_rate": 5.0666666666666674e-05,
+      "loss": 0.5322,
+      "step": 1480
+    },
+    {
+      "epoch": 1.2897678417884781,
+      "grad_norm": 7.58906888961792,
+      "learning_rate": 5e-05,
+      "loss": 0.477,
+      "step": 1500
+    },
+    {
+      "epoch": 1.3069647463456577,
+      "grad_norm": 5.045712947845459,
+      "learning_rate": 4.933333333333334e-05,
+      "loss": 0.5139,
+      "step": 1520
+    },
+    {
+      "epoch": 1.3241616509028376,
+      "grad_norm": 4.599825859069824,
+      "learning_rate": 4.866666666666667e-05,
+      "loss": 0.4997,
+      "step": 1540
+    },
+    {
+      "epoch": 1.3413585554600171,
+      "grad_norm": 5.756386756896973,
+      "learning_rate": 4.8e-05,
+      "loss": 0.4841,
+      "step": 1560
+    },
+    {
+      "epoch": 1.358555460017197,
+      "grad_norm": 4.89516544342041,
+      "learning_rate": 4.7333333333333336e-05,
+      "loss": 0.4183,
+      "step": 1580
+    },
+    {
+      "epoch": 1.3757523645743766,
+      "grad_norm": 4.484691143035889,
+      "learning_rate": 4.666666666666667e-05,
+      "loss": 0.4185,
+      "step": 1600
+    },
+    {
+      "epoch": 1.3929492691315564,
+      "grad_norm": 5.203677654266357,
+      "learning_rate": 4.600000000000001e-05,
+      "loss": 0.4097,
+      "step": 1620
+    },
+    {
+      "epoch": 1.410146173688736,
+      "grad_norm": 6.922574996948242,
+      "learning_rate": 4.5333333333333335e-05,
+      "loss": 0.4367,
+      "step": 1640
+    },
+    {
+      "epoch": 1.4273430782459158,
+      "grad_norm": 5.618770122528076,
+      "learning_rate": 4.466666666666667e-05,
+      "loss": 0.4453,
+      "step": 1660
+    },
+    {
+      "epoch": 1.4445399828030954,
+      "grad_norm": 4.377410888671875,
+      "learning_rate": 4.4000000000000006e-05,
+      "loss": 0.4416,
+      "step": 1680
+    },
+    {
+      "epoch": 1.4617368873602752,
+      "grad_norm": 7.413736343383789,
+      "learning_rate": 4.3333333333333334e-05,
+      "loss": 0.4488,
+      "step": 1700
+    },
+    {
+      "epoch": 1.4789337919174548,
+      "grad_norm": 4.4008049964904785,
+      "learning_rate": 4.266666666666667e-05,
+      "loss": 0.4167,
+      "step": 1720
+    },
+    {
+      "epoch": 1.4961306964746346,
+      "grad_norm": 5.33242130279541,
+      "learning_rate": 4.2e-05,
+      "loss": 0.486,
+      "step": 1740
+    },
+    {
+      "epoch": 1.5133276010318144,
+      "grad_norm": 7.043882369995117,
+      "learning_rate": 4.133333333333333e-05,
+      "loss": 0.407,
+      "step": 1760
+    },
+    {
+      "epoch": 1.530524505588994,
+      "grad_norm": 6.068751335144043,
+      "learning_rate": 4.066666666666667e-05,
+      "loss": 0.3846,
+      "step": 1780
+    },
+    {
+      "epoch": 1.5477214101461736,
+      "grad_norm": 5.452756404876709,
+      "learning_rate": 4e-05,
+      "loss": 0.4327,
+      "step": 1800
+    },
+    {
+      "epoch": 1.5649183147033534,
+      "grad_norm": 3.541025161743164,
+      "learning_rate": 3.933333333333333e-05,
+      "loss": 0.3734,
+      "step": 1820
+    },
+    {
+      "epoch": 1.5821152192605332,
+      "grad_norm": 6.634982585906982,
+      "learning_rate": 3.866666666666667e-05,
+      "loss": 0.3679,
+      "step": 1840
+    },
+    {
+      "epoch": 1.5993121238177128,
+      "grad_norm": 3.89568829536438,
+      "learning_rate": 3.8e-05,
+      "loss": 0.372,
+      "step": 1860
+    },
+    {
+      "epoch": 1.6165090283748924,
+      "grad_norm": 6.27597188949585,
+      "learning_rate": 3.733333333333334e-05,
+      "loss": 0.3971,
+      "step": 1880
+    },
+    {
+      "epoch": 1.6337059329320722,
+      "grad_norm": 3.7944319248199463,
+      "learning_rate": 3.6666666666666666e-05,
+      "loss": 0.3854,
+      "step": 1900
+    },
+    {
+      "epoch": 1.650902837489252,
+      "grad_norm": 4.370260238647461,
+      "learning_rate": 3.6e-05,
+      "loss": 0.3609,
+      "step": 1920
+    },
+    {
+      "epoch": 1.6680997420464316,
+      "grad_norm": 5.236139297485352,
+      "learning_rate": 3.5333333333333336e-05,
+      "loss": 0.3932,
+      "step": 1940
+    },
+    {
+      "epoch": 1.6852966466036112,
+      "grad_norm": 4.053391456604004,
+      "learning_rate": 3.466666666666667e-05,
+      "loss": 0.4073,
+      "step": 1960
+    },
+    {
+      "epoch": 1.702493551160791,
+      "grad_norm": 4.105246543884277,
+      "learning_rate": 3.4000000000000007e-05,
+      "loss": 0.3533,
+      "step": 1980
+    },
+    {
+      "epoch": 1.7196904557179709,
+      "grad_norm": 4.842738151550293,
+      "learning_rate": 3.3333333333333335e-05,
+      "loss": 0.3529,
+      "step": 2000
+    },
+    {
+      "epoch": 1.7368873602751504,
+      "grad_norm": 6.755301475524902,
+      "learning_rate": 3.266666666666667e-05,
+      "loss": 0.4043,
+      "step": 2020
+    },
+    {
+      "epoch": 1.75408426483233,
+      "grad_norm": 5.9597578048706055,
+      "learning_rate": 3.2000000000000005e-05,
+      "loss": 0.3603,
+      "step": 2040
+    },
+    {
+      "epoch": 1.7712811693895099,
+      "grad_norm": 3.4720447063446045,
+      "learning_rate": 3.1333333333333334e-05,
+      "loss": 0.3367,
+      "step": 2060
+    },
+    {
+      "epoch": 1.7884780739466897,
+      "grad_norm": 3.3950035572052,
+      "learning_rate": 3.066666666666667e-05,
+      "loss": 0.3333,
+      "step": 2080
+    },
+    {
+      "epoch": 1.8056749785038693,
+      "grad_norm": 3.364729642868042,
+      "learning_rate": 3e-05,
+      "loss": 0.3416,
+      "step": 2100
+    },
+    {
+      "epoch": 1.8228718830610489,
+      "grad_norm": 5.35959005355835,
+      "learning_rate": 2.9333333333333336e-05,
+      "loss": 0.3487,
+      "step": 2120
+    },
+    {
+      "epoch": 1.8400687876182287,
+      "grad_norm": 4.672595500946045,
+      "learning_rate": 2.8666666666666668e-05,
+      "loss": 0.3225,
+      "step": 2140
+    },
+    {
+      "epoch": 1.8572656921754085,
+      "grad_norm": 4.4767327308654785,
+      "learning_rate": 2.8000000000000003e-05,
+      "loss": 0.3306,
+      "step": 2160
+    },
+    {
+      "epoch": 1.874462596732588,
+      "grad_norm": 5.055034637451172,
+      "learning_rate": 2.733333333333333e-05,
+      "loss": 0.392,
+      "step": 2180
+    },
+    {
+      "epoch": 1.8916595012897677,
+      "grad_norm": 4.375268936157227,
+      "learning_rate": 2.6666666666666667e-05,
+      "loss": 0.3205,
+      "step": 2200
+    },
+    {
+      "epoch": 1.9088564058469477,
+      "grad_norm": 5.488368988037109,
+      "learning_rate": 2.6000000000000002e-05,
+      "loss": 0.3111,
+      "step": 2220
+    },
+    {
+      "epoch": 1.9260533104041273,
+      "grad_norm": 4.405417442321777,
+      "learning_rate": 2.5333333333333337e-05,
+      "loss": 0.3014,
+      "step": 2240
+    },
+    {
+      "epoch": 1.943250214961307,
+      "grad_norm": 3.466012716293335,
+      "learning_rate": 2.466666666666667e-05,
+      "loss": 0.2952,
+      "step": 2260
+    },
+    {
+      "epoch": 1.9604471195184867,
+      "grad_norm": 3.681208848953247,
+      "learning_rate": 2.4e-05,
+      "loss": 0.3012,
+      "step": 2280
+    },
+    {
+      "epoch": 1.9776440240756665,
+      "grad_norm": 6.128725528717041,
+      "learning_rate": 2.3333333333333336e-05,
+      "loss": 0.3056,
+      "step": 2300
+    },
+    {
+      "epoch": 1.9948409286328461,
+      "grad_norm": 4.9947333335876465,
+      "learning_rate": 2.2666666666666668e-05,
+      "loss": 0.3026,
+      "step": 2320
+    },
+    {
+      "epoch": 2.0120378331900257,
+      "grad_norm": 4.199079990386963,
+      "learning_rate": 2.2033333333333335e-05,
+      "loss": 0.3554,
+      "step": 2340
+    },
+    {
+      "epoch": 2.0292347377472053,
+      "grad_norm": 3.375105381011963,
+      "learning_rate": 2.1366666666666667e-05,
+      "loss": 0.2795,
+      "step": 2360
+    },
+    {
+      "epoch": 2.0464316423043853,
+      "grad_norm": 4.369241237640381,
+      "learning_rate": 2.07e-05,
+      "loss": 0.2719,
+      "step": 2380
+    },
+    {
+      "epoch": 2.063628546861565,
+      "grad_norm": 3.1906027793884277,
+      "learning_rate": 2.0033333333333334e-05,
+      "loss": 0.2681,
+      "step": 2400
+    },
+    {
+      "epoch": 2.0808254514187445,
+      "grad_norm": 3.7493109703063965,
+      "learning_rate": 1.9366666666666665e-05,
+      "loss": 0.2765,
+      "step": 2420
+    },
+    {
+      "epoch": 2.098022355975924,
+      "grad_norm": 4.610039234161377,
+      "learning_rate": 1.87e-05,
+      "loss": 0.2734,
+      "step": 2440
+    },
+    {
+      "epoch": 2.115219260533104,
+      "grad_norm": 3.996819019317627,
+      "learning_rate": 1.8033333333333336e-05,
+      "loss": 0.2732,
+      "step": 2460
+    },
+    {
+      "epoch": 2.1324161650902838,
+      "grad_norm": 3.3538951873779297,
+      "learning_rate": 1.7366666666666668e-05,
+      "loss": 0.2796,
+      "step": 2480
+    },
+    {
+      "epoch": 2.1496130696474633,
+      "grad_norm": 2.914961814880371,
+      "learning_rate": 1.6700000000000003e-05,
+      "loss": 0.2572,
+      "step": 2500
+    },
+    {
+      "epoch": 2.166809974204643,
+      "grad_norm": 3.70522141456604,
+      "learning_rate": 1.6033333333333335e-05,
+      "loss": 0.2914,
+      "step": 2520
+    },
+    {
+      "epoch": 2.184006878761823,
+      "grad_norm": 2.8733532428741455,
+      "learning_rate": 1.536666666666667e-05,
+      "loss": 0.2577,
+      "step": 2540
+    },
+    {
+      "epoch": 2.2012037833190026,
+      "grad_norm": 5.222810745239258,
+      "learning_rate": 1.47e-05,
+      "loss": 0.2805,
+      "step": 2560
+    },
+    {
+      "epoch": 2.218400687876182,
+      "grad_norm": 3.2695486545562744,
+      "learning_rate": 1.4033333333333335e-05,
+      "loss": 0.2716,
+      "step": 2580
+    },
+    {
+      "epoch": 2.235597592433362,
+      "grad_norm": 4.164505958557129,
+      "learning_rate": 1.3366666666666667e-05,
+      "loss": 0.2618,
+      "step": 2600
+    },
+    {
+      "epoch": 2.252794496990542,
+      "grad_norm": 3.265292167663574,
+      "learning_rate": 1.27e-05,
+      "loss": 0.2593,
+      "step": 2620
+    },
+    {
+      "epoch": 2.2699914015477214,
+      "grad_norm": 3.0533978939056396,
+      "learning_rate": 1.2033333333333334e-05,
+      "loss": 0.2661,
+      "step": 2640
+    },
+    {
+      "epoch": 2.287188306104901,
+      "grad_norm": 3.9265048503875732,
+      "learning_rate": 1.1366666666666667e-05,
+      "loss": 0.2634,
+      "step": 2660
+    },
+    {
+      "epoch": 2.304385210662081,
+      "grad_norm": 3.1227498054504395,
+      "learning_rate": 1.0700000000000001e-05,
+      "loss": 0.2477,
+      "step": 2680
+    },
+    {
+      "epoch": 2.3215821152192606,
+      "grad_norm": 2.6049001216888428,
+      "learning_rate": 1.0033333333333333e-05,
+      "loss": 0.2579,
+      "step": 2700
+    },
+    {
+      "epoch": 2.33877901977644,
+      "grad_norm": 3.21602201461792,
+      "learning_rate": 9.366666666666666e-06,
+      "loss": 0.2588,
+      "step": 2720
+    },
+    {
+      "epoch": 2.35597592433362,
+      "grad_norm": 2.96010160446167,
+      "learning_rate": 8.7e-06,
+      "loss": 0.2463,
+      "step": 2740
+    },
+    {
+      "epoch": 2.3731728288908,
+      "grad_norm": 2.7661256790161133,
+      "learning_rate": 8.033333333333335e-06,
+      "loss": 0.2422,
+      "step": 2760
+    },
+    {
+      "epoch": 2.3903697334479794,
+      "grad_norm": 3.1497080326080322,
+      "learning_rate": 7.3666666666666676e-06,
+      "loss": 0.2459,
+      "step": 2780
+    },
+    {
+      "epoch": 2.407566638005159,
+      "grad_norm": 3.7932960987091064,
+      "learning_rate": 6.700000000000001e-06,
+      "loss": 0.2375,
+      "step": 2800
+    },
+    {
+      "epoch": 2.4247635425623386,
+      "grad_norm": 2.217522382736206,
+      "learning_rate": 6.033333333333334e-06,
+      "loss": 0.2356,
+      "step": 2820
+    },
+    {
+      "epoch": 2.4419604471195187,
+      "grad_norm": 2.5312998294830322,
+      "learning_rate": 5.366666666666667e-06,
+      "loss": 0.2359,
+      "step": 2840
+    },
+    {
+      "epoch": 2.4591573516766982,
+      "grad_norm": 3.2574574947357178,
+      "learning_rate": 4.7e-06,
+      "loss": 0.2439,
+      "step": 2860
+    },
+    {
+      "epoch": 2.476354256233878,
+      "grad_norm": 3.601336717605591,
+      "learning_rate": 4.033333333333333e-06,
+      "loss": 0.2566,
+      "step": 2880
+    },
+    {
+      "epoch": 2.4935511607910574,
+      "grad_norm": 3.639084577560425,
+      "learning_rate": 3.3666666666666665e-06,
+      "loss": 0.2612,
+      "step": 2900
+    },
+    {
+      "epoch": 2.5107480653482375,
+      "grad_norm": 3.7567994594573975,
+      "learning_rate": 2.7e-06,
+      "loss": 0.2439,
+      "step": 2920
+    },
+    {
+      "epoch": 2.527944969905417,
+      "grad_norm": 3.882097005844116,
+      "learning_rate": 2.033333333333333e-06,
+      "loss": 0.2567,
+      "step": 2940
+    },
+    {
+      "epoch": 2.5451418744625967,
+      "grad_norm": 3.611342430114746,
+      "learning_rate": 1.3666666666666668e-06,
+      "loss": 0.2582,
+      "step": 2960
+    },
+    {
+      "epoch": 2.5623387790197762,
+      "grad_norm": 3.290154457092285,
+      "learning_rate": 7.000000000000001e-07,
+      "loss": 0.2398,
+      "step": 2980
+    },
+    {
+      "epoch": 2.5795356835769563,
+      "grad_norm": 3.562819719314575,
+      "learning_rate": 3.3333333333333334e-08,
+      "loss": 0.2522,
+      "step": 3000
+    }
+  ],
+  "logging_steps": 20,
+  "max_steps": 3000,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.5195758126481408e+17,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

outputs/checkpoint-3000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:171c89f8ecd1388fc79e0fbedd3775495c90abd0688b87c0090ac48cd2e89673
+size 5048

outputs/checkpoint-500/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+library_name: peft
+base_model: beomi/KoAlpaca-Polyglot-5.8B
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.10.1.dev0

outputs/checkpoint-500/adapter_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "beomi/KoAlpaca-Polyglot-5.8B",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "query_key_value"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

outputs/checkpoint-500/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c7b031c7609e5755dcc7056d5c6b2dca7b550f94685d2045804e085e3e5acf7
+size 14688200

outputs/checkpoint-500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c73a33b28554fb907899fb435d2eab64fc29a016c05aabef992fe56358e53e9f
+size 29407610

outputs/checkpoint-500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1055de0c37662d81d1630d00c96187b856dd6b76fc95be222c5d5873a148015c
+size 14244

outputs/checkpoint-500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:550a2f153663d8a0c54449bb5360e027e7367cf76d44cbcf1b726829b9919970
+size 1064