goldsounds commited on Mar 14

Commit

7111b87

•

1 Parent(s): 0fb5b8d

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +248 -0
adapter_config.json +32 -0
adapter_model.safetensors +3 -0
all_results.json +13 -0
checkpoint-1000/README.md +204 -0
checkpoint-1000/adapter_config.json +32 -0
checkpoint-1000/adapter_model.safetensors +3 -0
checkpoint-1000/merges.txt +0 -0
checkpoint-1000/optimizer.pt +3 -0
checkpoint-1000/rng_state.pth +3 -0
checkpoint-1000/scheduler.pt +3 -0
checkpoint-1000/special_tokens_map.json +64 -0
checkpoint-1000/tokenizer_config.json +361 -0
checkpoint-1000/trainer_state.json +1501 -0
checkpoint-1000/training_args.bin +3 -0
checkpoint-1000/vocab.json +0 -0
checkpoint-10000/README.md +204 -0
checkpoint-10000/adapter_config.json +32 -0
checkpoint-10000/adapter_model.safetensors +3 -0
checkpoint-10000/merges.txt +0 -0
checkpoint-10000/optimizer.pt +3 -0
checkpoint-10000/rng_state.pth +3 -0
checkpoint-10000/scheduler.pt +3 -0
checkpoint-10000/special_tokens_map.json +64 -0
checkpoint-10000/tokenizer_config.json +361 -0
checkpoint-10000/trainer_state.json +0 -0
checkpoint-10000/training_args.bin +3 -0
checkpoint-10000/vocab.json +0 -0
checkpoint-10500/README.md +204 -0
checkpoint-10500/adapter_config.json +32 -0
checkpoint-10500/adapter_model.safetensors +3 -0
checkpoint-10500/merges.txt +0 -0
checkpoint-10500/optimizer.pt +3 -0
checkpoint-10500/rng_state.pth +3 -0
checkpoint-10500/scheduler.pt +3 -0
checkpoint-10500/special_tokens_map.json +64 -0
checkpoint-10500/tokenizer_config.json +361 -0
checkpoint-10500/trainer_state.json +0 -0
checkpoint-10500/training_args.bin +3 -0
checkpoint-10500/vocab.json +0 -0
checkpoint-11000/README.md +204 -0
checkpoint-11000/adapter_config.json +32 -0
checkpoint-11000/adapter_model.safetensors +3 -0
checkpoint-11000/merges.txt +0 -0
checkpoint-11000/optimizer.pt +3 -0
checkpoint-11000/rng_state.pth +3 -0
checkpoint-11000/scheduler.pt +3 -0
checkpoint-11000/special_tokens_map.json +64 -0
checkpoint-11000/tokenizer_config.json +361 -0
checkpoint-11000/trainer_state.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,248 @@

+---
+license: other
+library_name: peft
+tags:
+- llama-factory
+- lora
+- generated_from_trainer
+base_model: bigcode/starcoder2-7b
+model-index:
+- name: sft
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# sft
+This model is a fine-tuned version of [bigcode/starcoder2-7b](https://huggingface.co/bigcode/starcoder2-7b) on the starcoder_jetpack dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.6761
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-05
+- train_batch_size: 1
+- eval_batch_size: 1
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- num_epochs: 3.0
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch | Step  | Validation Loss |
+|:-------------:|:-----:|:-----:|:---------------:|
+| 1.2469        | 0.02  | 100   | 0.8479          |
+| 1.3584        | 0.03  | 200   | 0.8368          |
+| 1.0651        | 0.05  | 300   | 0.8281          |
+| 0.9209        | 0.06  | 400   | 0.8185          |
+| 0.8306        | 0.08  | 500   | 0.8132          |
+| 0.9175        | 0.1   | 600   | 0.8064          |
+| 0.8157        | 0.11  | 700   | 0.8023          |
+| 0.9469        | 0.13  | 800   | 0.7996          |
+| 0.8872        | 0.14  | 900   | 0.7955          |
+| 0.8842        | 0.16  | 1000  | 0.7913          |
+| 0.7909        | 0.18  | 1100  | 0.7863          |
+| 0.8196        | 0.19  | 1200  | 0.7844          |
+| 0.9341        | 0.21  | 1300  | 0.7825          |
+| 0.8801        | 0.22  | 1400  | 0.7787          |
+| 0.9109        | 0.24  | 1500  | 0.7777          |
+| 0.8964        | 0.26  | 1600  | 0.7759          |
+| 0.9265        | 0.27  | 1700  | 0.7742          |
+| 0.8632        | 0.29  | 1800  | 0.7699          |
+| 1.0081        | 0.3   | 1900  | 0.7693          |
+| 0.7651        | 0.32  | 2000  | 0.7664          |
+| 1.0037        | 0.34  | 2100  | 0.7654          |
+| 0.8768        | 0.35  | 2200  | 0.7642          |
+| 0.8052        | 0.37  | 2300  | 0.7618          |
+| 0.7271        | 0.38  | 2400  | 0.7595          |
+| 0.9615        | 0.4   | 2500  | 0.7582          |
+| 0.8284        | 0.42  | 2600  | 0.7555          |
+| 0.8631        | 0.43  | 2700  | 0.7540          |
+| 1.0507        | 0.45  | 2800  | 0.7518          |
+| 0.8247        | 0.46  | 2900  | 0.7512          |
+| 0.9835        | 0.48  | 3000  | 0.7496          |
+| 0.8407        | 0.49  | 3100  | 0.7496          |
+| 0.7417        | 0.51  | 3200  | 0.7467          |
+| 0.7449        | 0.53  | 3300  | 0.7473          |
+| 0.8562        | 0.54  | 3400  | 0.7437          |
+| 0.9222        | 0.56  | 3500  | 0.7429          |
+| 0.9242        | 0.57  | 3600  | 0.7413          |
+| 0.8092        | 0.59  | 3700  | 0.7403          |
+| 0.7279        | 0.61  | 3800  | 0.7394          |
+| 0.7774        | 0.62  | 3900  | 0.7385          |
+| 0.8942        | 0.64  | 4000  | 0.7364          |
+| 0.9286        | 0.65  | 4100  | 0.7348          |
+| 0.7703        | 0.67  | 4200  | 0.7354          |
+| 0.8322        | 0.69  | 4300  | 0.7330          |
+| 0.9851        | 0.7   | 4400  | 0.7324          |
+| 0.8712        | 0.72  | 4500  | 0.7317          |
+| 0.7871        | 0.73  | 4600  | 0.7310          |
+| 0.7156        | 0.75  | 4700  | 0.7284          |
+| 0.7856        | 0.77  | 4800  | 0.7277          |
+| 0.7906        | 0.78  | 4900  | 0.7255          |
+| 0.7917        | 0.8   | 5000  | 0.7250          |
+| 0.6395        | 0.81  | 5100  | 0.7237          |
+| 0.7567        | 0.83  | 5200  | 0.7232          |
+| 0.8551        | 0.85  | 5300  | 0.7220          |
+| 0.7392        | 0.86  | 5400  | 0.7226          |
+| 0.9312        | 0.88  | 5500  | 0.7205          |
+| 0.8323        | 0.89  | 5600  | 0.7196          |
+| 0.7312        | 0.91  | 5700  | 0.7197          |
+| 1.0           | 0.93  | 5800  | 0.7182          |
+| 0.6164        | 0.94  | 5900  | 0.7177          |
+| 0.7484        | 0.96  | 6000  | 0.7147          |
+| 0.7924        | 0.97  | 6100  | 0.7144          |
+| 0.9389        | 0.99  | 6200  | 0.7145          |
+| 0.7108        | 1.01  | 6300  | 0.7136          |
+| 0.8076        | 1.02  | 6400  | 0.7154          |
+| 0.7232        | 1.04  | 6500  | 0.7147          |
+| 0.6456        | 1.05  | 6600  | 0.7122          |
+| 0.5862        | 1.07  | 6700  | 0.7133          |
+| 0.6935        | 1.09  | 6800  | 0.7112          |
+| 0.7522        | 1.1   | 6900  | 0.7103          |
+| 1.0525        | 1.12  | 7000  | 0.7106          |
+| 0.8285        | 1.13  | 7100  | 0.7099          |
+| 0.6116        | 1.15  | 7200  | 0.7079          |
+| 0.5617        | 1.17  | 7300  | 0.7087          |
+| 0.6514        | 1.18  | 7400  | 0.7072          |
+| 0.6729        | 1.2   | 7500  | 0.7052          |
+| 0.6401        | 1.21  | 7600  | 0.7055          |
+| 0.8089        | 1.23  | 7700  | 0.7052          |
+| 0.8166        | 1.25  | 7800  | 0.7041          |
+| 0.8685        | 1.26  | 7900  | 0.7026          |
+| 0.6945        | 1.28  | 8000  | 0.7043          |
+| 0.6955        | 1.29  | 8100  | 0.7010          |
+| 0.734         | 1.31  | 8200  | 0.7022          |
+| 0.5586        | 1.32  | 8300  | 0.7017          |
+| 0.7299        | 1.34  | 8400  | 0.6999          |
+| 1.089         | 1.36  | 8500  | 0.6994          |
+| 0.5733        | 1.37  | 8600  | 0.6994          |
+| 0.5409        | 1.39  | 8700  | 0.6987          |
+| 0.8848        | 1.4   | 8800  | 0.6976          |
+| 0.5739        | 1.42  | 8900  | 0.6971          |
+| 0.728         | 1.44  | 9000  | 0.6963          |
+| 0.7503        | 1.45  | 9100  | 0.6953          |
+| 0.706         | 1.47  | 9200  | 0.6951          |
+| 0.7926        | 1.48  | 9300  | 0.6945          |
+| 0.6019        | 1.5   | 9400  | 0.6945          |
+| 0.6707        | 1.52  | 9500  | 0.6943          |
+| 0.7269        | 1.53  | 9600  | 0.6940          |
+| 0.7216        | 1.55  | 9700  | 0.6923          |
+| 0.6394        | 1.56  | 9800  | 0.6920          |
+| 0.7608        | 1.58  | 9900  | 0.6909          |
+| 1.034         | 1.6   | 10000 | 0.6908          |
+| 0.7934        | 1.61  | 10100 | 0.6892          |
+| 0.627         | 1.63  | 10200 | 0.6902          |
+| 0.5849        | 1.64  | 10300 | 0.6897          |
+| 0.7257        | 1.66  | 10400 | 0.6889          |
+| 0.8931        | 1.68  | 10500 | 0.6890          |
+| 0.6831        | 1.69  | 10600 | 0.6875          |
+| 0.4995        | 1.71  | 10700 | 0.6879          |
+| 0.757         | 1.72  | 10800 | 0.6873          |
+| 0.4664        | 1.74  | 10900 | 0.6876          |
+| 0.78          | 1.76  | 11000 | 0.6865          |
+| 0.5564        | 1.77  | 11100 | 0.6865          |
+| 0.7858        | 1.79  | 11200 | 0.6858          |
+| 0.6989        | 1.8   | 11300 | 0.6851          |
+| 0.705         | 1.82  | 11400 | 0.6841          |
+| 0.5795        | 1.84  | 11500 | 0.6842          |
+| 0.6989        | 1.85  | 11600 | 0.6837          |
+| 0.6877        | 1.87  | 11700 | 0.6838          |
+| 0.6484        | 1.88  | 11800 | 0.6835          |
+| 0.8525        | 1.9   | 11900 | 0.6832          |
+| 0.7547        | 1.92  | 12000 | 0.6823          |
+| 0.8118        | 1.93  | 12100 | 0.6819          |
+| 0.8859        | 1.95  | 12200 | 0.6823          |
+| 0.738         | 1.96  | 12300 | 0.6811          |
+| 0.7051        | 1.98  | 12400 | 0.6816          |
+| 0.5598        | 2.0   | 12500 | 0.6802          |
+| 0.6194        | 2.01  | 12600 | 0.6812          |
+| 0.7101        | 2.03  | 12700 | 0.6817          |
+| 0.7027        | 2.04  | 12800 | 0.6815          |
+| 0.9432        | 2.06  | 12900 | 0.6810          |
+| 0.5931        | 2.08  | 13000 | 0.6817          |
+| 0.5412        | 2.09  | 13100 | 0.6810          |
+| 0.6237        | 2.11  | 13200 | 0.6815          |
+| 0.5871        | 2.12  | 13300 | 0.6812          |
+| 0.8331        | 2.14  | 13400 | 0.6817          |
+| 0.4528        | 2.15  | 13500 | 0.6812          |
+| 0.6292        | 2.17  | 13600 | 0.6814          |
+| 0.6219        | 2.19  | 13700 | 0.6800          |
+| 0.6281        | 2.2   | 13800 | 0.6798          |
+| 0.6949        | 2.22  | 13900 | 0.6803          |
+| 0.6701        | 2.23  | 14000 | 0.6791          |
+| 0.6467        | 2.25  | 14100 | 0.6795          |
+| 0.6579        | 2.27  | 14200 | 0.6800          |
+| 0.5978        | 2.28  | 14300 | 0.6802          |
+| 0.7032        | 2.3   | 14400 | 0.6793          |
+| 0.6347        | 2.31  | 14500 | 0.6787          |
+| 0.9034        | 2.33  | 14600 | 0.6788          |
+| 0.6166        | 2.35  | 14700 | 0.6781          |
+| 0.7327        | 2.36  | 14800 | 0.6786          |
+| 0.7159        | 2.38  | 14900 | 0.6777          |
+| 0.6283        | 2.39  | 15000 | 0.6779          |
+| 0.6113        | 2.41  | 15100 | 0.6776          |
+| 0.5951        | 2.43  | 15200 | 0.6781          |
+| 0.6747        | 2.44  | 15300 | 0.6777          |
+| 0.5935        | 2.46  | 15400 | 0.6779          |
+| 0.6435        | 2.47  | 15500 | 0.6776          |
+| 0.637         | 2.49  | 15600 | 0.6772          |
+| 0.4617        | 2.51  | 15700 | 0.6774          |
+| 0.7937        | 2.52  | 15800 | 0.6771          |
+| 0.7187        | 2.54  | 15900 | 0.6768          |
+| 0.657         | 2.55  | 16000 | 0.6767          |
+| 0.8606        | 2.57  | 16100 | 0.6767          |
+| 0.4392        | 2.59  | 16200 | 0.6768          |
+| 0.5675        | 2.6   | 16300 | 0.6769          |
+| 0.6454        | 2.62  | 16400 | 0.6768          |
+| 0.5787        | 2.63  | 16500 | 0.6767          |
+| 0.6111        | 2.65  | 16600 | 0.6766          |
+| 0.6106        | 2.67  | 16700 | 0.6767          |
+| 0.5947        | 2.68  | 16800 | 0.6763          |
+| 0.5576        | 2.7   | 16900 | 0.6763          |
+| 0.659         | 2.71  | 17000 | 0.6762          |
+| 0.787         | 2.73  | 17100 | 0.6761          |
+| 0.5503        | 2.75  | 17200 | 0.6760          |
+| 0.5558        | 2.76  | 17300 | 0.6760          |
+| 0.6324        | 2.78  | 17400 | 0.6761          |
+| 0.5846        | 2.79  | 17500 | 0.6761          |
+| 0.9542        | 2.81  | 17600 | 0.6760          |
+| 0.5755        | 2.83  | 17700 | 0.6761          |
+| 0.7841        | 2.84  | 17800 | 0.6761          |
+| 0.5662        | 2.86  | 17900 | 0.6761          |
+| 0.8085        | 2.87  | 18000 | 0.6761          |
+| 0.7389        | 2.89  | 18100 | 0.6761          |
+| 0.736         | 2.91  | 18200 | 0.6761          |
+| 0.5604        | 2.92  | 18300 | 0.6761          |
+| 0.6156        | 2.94  | 18400 | 0.6761          |
+| 0.5473        | 2.95  | 18500 | 0.6761          |
+| 0.7286        | 2.97  | 18600 | 0.6761          |
+| 0.5932        | 2.98  | 18700 | 0.6761          |
+### Framework versions
+- PEFT 0.9.0
+- Transformers 4.39.0.dev0
+- Pytorch 2.1.0+cu121
+- Datasets 2.18.0
+- Tokenizers 0.15.2

adapter_config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder2-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.15,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_fc",
+    "o_proj",
+    "q_proj",
+    "v_proj",
+    "k_proj",
+    "c_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc16c87cc4e55e8b52d845df95957993632cd645300dde18af970b85430a49d3
+size 306235552

all_results.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+    "epoch": 3.0,
+    "eval_loss": 0.6760696172714233,
+    "eval_runtime": 96.721,
+    "eval_samples_per_second": 7.206,
+    "eval_steps_per_second": 7.206,
+    "perplexity": 1.9661348634697817,
+    "total_flos": 8.111041940658586e+17,
+    "train_loss": 0.7353765367668783,
+    "train_runtime": 28019.6609,
+    "train_samples_per_second": 0.671,
+    "train_steps_per_second": 0.671
+}

checkpoint-1000/README.md ADDED Viewed

	@@ -0,0 +1,204 @@

+---
+library_name: peft
+base_model: bigcode/starcoder2-7b
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.9.0

checkpoint-1000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder2-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.15,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_fc",
+    "o_proj",
+    "q_proj",
+    "v_proj",
+    "k_proj",
+    "c_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-1000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3cf012786c99967b1faabcd02528ccb46bd8e1dda55139ee8c61333a072a9f7c
+size 306235552

checkpoint-1000/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-1000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:009f146f4255b0a1b95a91fd2fb1bf70dc089a13ad98bd2f64673ea1217bfca0
+size 612692114

checkpoint-1000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84fe6feed7b7aac04ce2bfaadc40072bafdaf974be4de840016f8ea7305b750a
+size 14244

checkpoint-1000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b83b0d57be939973ba63fb96361823572996faa011b323939493bdbb88e03d73
+size 1064

checkpoint-1000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "<fim_prefix>",
+    "<fim_middle>",
+    "<fim_suffix>",
+    "<fim_pad>",
+    "<repo_name>",
+    "<file_sep>",
+    "<issue_start>",
+    "<issue_comment>",
+    "<issue_closed>",
+    "<jupyter_start>",
+    "<jupyter_text>",
+    "<jupyter_code>",
+    "<jupyter_output>",
+    "<jupyter_script>",
+    "<empty_output>",
+    "<code_to_intermediate>",
+    "<intermediate_to_code>",
+    "<pr>",
+    "<pr_status>",
+    "<pr_is_merged>",
+    "<pr_base>",
+    "<pr_file>",
+    "<pr_base_code>",
+    "<pr_diff>",
+    "<pr_diff_hunk>",
+    "<pr_comment>",
+    "<pr_event_id>",
+    "<pr_review>",
+    "<pr_review_state>",
+    "<pr_review_comment>",
+    "<pr_in_reply_to_review_id>",
+    "<pr_in_reply_to_comment_id>",
+    "<pr_diff_hunk_comment_line>",
+    "<NAME>",
+    "<EMAIL>",
+    "<KEY>",
+    "<PASSWORD>"
+  ],
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-1000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,361 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<fim_prefix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<fim_middle>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<fim_suffix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<fim_pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<repo_name>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<file_sep>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<issue_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "<issue_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<issue_closed>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "<jupyter_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<jupyter_text>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "<jupyter_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "<jupyter_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "<jupyter_script>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "<empty_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "<code_to_intermediate>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "<intermediate_to_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "<pr>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "19": {
+      "content": "<pr_status>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "20": {
+      "content": "<pr_is_merged>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "21": {
+      "content": "<pr_base>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "22": {
+      "content": "<pr_file>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "23": {
+      "content": "<pr_base_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "24": {
+      "content": "<pr_diff>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "25": {
+      "content": "<pr_diff_hunk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "26": {
+      "content": "<pr_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "27": {
+      "content": "<pr_event_id>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "28": {
+      "content": "<pr_review>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "29": {
+      "content": "<pr_review_state>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30": {
+      "content": "<pr_review_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "31": {
+      "content": "<pr_in_reply_to_review_id>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32": {
+      "content": "<pr_in_reply_to_comment_id>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "33": {
+      "content": "<pr_diff_hunk_comment_line>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "34": {
+      "content": "<NAME>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "35": {
+      "content": "<EMAIL>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "36": {
+      "content": "<KEY>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "37": {
+      "content": "<PASSWORD>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "<fim_prefix>",
+    "<fim_middle>",
+    "<fim_suffix>",
+    "<fim_pad>",
+    "<repo_name>",
+    "<file_sep>",
+    "<issue_start>",
+    "<issue_comment>",
+    "<issue_closed>",
+    "<jupyter_start>",
+    "<jupyter_text>",
+    "<jupyter_code>",
+    "<jupyter_output>",
+    "<jupyter_script>",
+    "<empty_output>",
+    "<code_to_intermediate>",
+    "<intermediate_to_code>",
+    "<pr>",
+    "<pr_status>",
+    "<pr_is_merged>",
+    "<pr_base>",
+    "<pr_file>",
+    "<pr_base_code>",
+    "<pr_diff>",
+    "<pr_diff_hunk>",
+    "<pr_comment>",
+    "<pr_event_id>",
+    "<pr_review>",
+    "<pr_review_state>",
+    "<pr_review_comment>",
+    "<pr_in_reply_to_review_id>",
+    "<pr_in_reply_to_comment_id>",
+    "<pr_diff_hunk_comment_line>",
+    "<NAME>",
+    "<EMAIL>",
+    "<KEY>",
+    "<PASSWORD>"
+  ],
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>",
+  "vocab_size": 49152
+}

checkpoint-1000/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1501 @@

+{
+  "best_metric": 0.7912728190422058,
+  "best_model_checkpoint": "saves/starcoder2-7b/lora/sft/checkpoint-1000",
+  "epoch": 0.1596169193934557,
+  "eval_steps": 100,
+  "global_step": 1000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0,
+      "grad_norm": 0.628385603427887,
+      "learning_rate": 4.999999126897802e-05,
+      "loss": 1.2582,
+      "step": 5
+    },
+    {
+      "epoch": 0.0,
+      "grad_norm": 1.0855119228363037,
+      "learning_rate": 4.999996507591817e-05,
+      "loss": 0.801,
+      "step": 10
+    },
+    {
+      "epoch": 0.0,
+      "grad_norm": 1.5689586400985718,
+      "learning_rate": 4.9999921420838745e-05,
+      "loss": 1.067,
+      "step": 15
+    },
+    {
+      "epoch": 0.0,
+      "grad_norm": 2.0851330757141113,
+      "learning_rate": 4.999986030377024e-05,
+      "loss": 1.2953,
+      "step": 20
+    },
+    {
+      "epoch": 0.0,
+      "grad_norm": 1.397479772567749,
+      "learning_rate": 4.999978172475535e-05,
+      "loss": 0.9826,
+      "step": 25
+    },
+    {
+      "epoch": 0.0,
+      "grad_norm": 1.344118595123291,
+      "learning_rate": 4.9999685683848954e-05,
+      "loss": 0.9485,
+      "step": 30
+    },
+    {
+      "epoch": 0.01,
+      "grad_norm": 1.158163070678711,
+      "learning_rate": 4.9999596278606616e-05,
+      "loss": 0.8103,
+      "step": 35
+    },
+    {
+      "epoch": 0.01,
+      "grad_norm": 1.602233648300171,
+      "learning_rate": 4.999946880647276e-05,
+      "loss": 0.8648,
+      "step": 40
+    },
+    {
+      "epoch": 0.01,
+      "grad_norm": 1.557242751121521,
+      "learning_rate": 4.999932387266596e-05,
+      "loss": 1.0198,
+      "step": 45
+    },
+    {
+      "epoch": 0.01,
+      "grad_norm": 1.36068856716156,
+      "learning_rate": 4.999916147728746e-05,
+      "loss": 0.9367,
+      "step": 50
+    },
+    {
+      "epoch": 0.01,
+      "grad_norm": 1.3263639211654663,
+      "learning_rate": 4.999898162045068e-05,
+      "loss": 0.9695,
+      "step": 55
+    },
+    {
+      "epoch": 0.01,
+      "grad_norm": 1.333601474761963,
+      "learning_rate": 4.999878430228126e-05,
+      "loss": 1.1509,
+      "step": 60
+    },
+    {
+      "epoch": 0.01,
+      "grad_norm": 1.4753800630569458,
+      "learning_rate": 4.999856952291702e-05,
+      "loss": 1.1461,
+      "step": 65
+    },
+    {
+      "epoch": 0.01,
+      "grad_norm": 1.5096240043640137,
+      "learning_rate": 4.9998337282507965e-05,
+      "loss": 1.1722,
+      "step": 70
+    },
+    {
+      "epoch": 0.01,
+      "grad_norm": 1.189892053604126,
+      "learning_rate": 4.999808758121633e-05,
+      "loss": 1.1834,
+      "step": 75
+    },
+    {
+      "epoch": 0.01,
+      "grad_norm": 0.9292634725570679,
+      "learning_rate": 4.999782041921651e-05,
+      "loss": 0.9498,
+      "step": 80
+    },
+    {
+      "epoch": 0.01,
+      "grad_norm": 2.1775777339935303,
+      "learning_rate": 4.9997535796695134e-05,
+      "loss": 0.9346,
+      "step": 85
+    },
+    {
+      "epoch": 0.01,
+      "grad_norm": 1.6854296922683716,
+      "learning_rate": 4.999723371385099e-05,
+      "loss": 1.119,
+      "step": 90
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.4571490287780762,
+      "learning_rate": 4.999691417089507e-05,
+      "loss": 0.8671,
+      "step": 95
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.277044653892517,
+      "learning_rate": 4.999657716805059e-05,
+      "loss": 1.2469,
+      "step": 100
+    },
+    {
+      "epoch": 0.02,
+      "eval_loss": 0.8478816747665405,
+      "eval_runtime": 96.2736,
+      "eval_samples_per_second": 7.24,
+      "eval_steps_per_second": 7.24,
+      "step": 100
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 0.6687743067741394,
+      "learning_rate": 4.9996222705552933e-05,
+      "loss": 0.735,
+      "step": 105
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.3488354682922363,
+      "learning_rate": 4.9995850783649665e-05,
+      "loss": 0.8344,
+      "step": 110
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.1043323278427124,
+      "learning_rate": 4.9995461402600593e-05,
+      "loss": 0.8254,
+      "step": 115
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 0.9382895827293396,
+      "learning_rate": 4.9995054562677684e-05,
+      "loss": 0.9179,
+      "step": 120
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.2824612855911255,
+      "learning_rate": 4.9994630264165107e-05,
+      "loss": 0.8663,
+      "step": 125
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.0491925477981567,
+      "learning_rate": 4.999418850735923e-05,
+      "loss": 0.9247,
+      "step": 130
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.3642233610153198,
+      "learning_rate": 4.99937292925686e-05,
+      "loss": 0.8253,
+      "step": 135
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 3.747757911682129,
+      "learning_rate": 4.9993252620113976e-05,
+      "loss": 1.0245,
+      "step": 140
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.299494981765747,
+      "learning_rate": 4.999275849032832e-05,
+      "loss": 0.8723,
+      "step": 145
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 1.7195830345153809,
+      "learning_rate": 4.999224690355675e-05,
+      "loss": 1.0524,
+      "step": 150
+    },
+    {
+      "epoch": 0.02,
+      "grad_norm": 0.9922987222671509,
+      "learning_rate": 4.9991717860156616e-05,
+      "loss": 0.9502,
+      "step": 155
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 1.0577458143234253,
+      "learning_rate": 4.9991171360497437e-05,
+      "loss": 1.0115,
+      "step": 160
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 1.0001195669174194,
+      "learning_rate": 4.999060740496093e-05,
+      "loss": 1.1999,
+      "step": 165
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 1.2456804513931274,
+      "learning_rate": 4.999002599394102e-05,
+      "loss": 0.8882,
+      "step": 170
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 1.0445325374603271,
+      "learning_rate": 4.9989427127843814e-05,
+      "loss": 1.0615,
+      "step": 175
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 1.2410887479782104,
+      "learning_rate": 4.9988810807087584e-05,
+      "loss": 1.1068,
+      "step": 180
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 0.8935971260070801,
+      "learning_rate": 4.998817703210285e-05,
+      "loss": 0.6683,
+      "step": 185
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 1.1614488363265991,
+      "learning_rate": 4.9987525803332265e-05,
+      "loss": 0.7446,
+      "step": 190
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 0.9392004013061523,
+      "learning_rate": 4.998685712123072e-05,
+      "loss": 0.7397,
+      "step": 195
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 1.0314444303512573,
+      "learning_rate": 4.9986170986265266e-05,
+      "loss": 1.3584,
+      "step": 200
+    },
+    {
+      "epoch": 0.03,
+      "eval_loss": 0.8368077278137207,
+      "eval_runtime": 96.5262,
+      "eval_samples_per_second": 7.221,
+      "eval_steps_per_second": 7.221,
+      "step": 200
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 0.8964811563491821,
+      "learning_rate": 4.998546739891516e-05,
+      "loss": 0.9546,
+      "step": 205
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 1.0679796934127808,
+      "learning_rate": 4.998474635967185e-05,
+      "loss": 0.864,
+      "step": 210
+    },
+    {
+      "epoch": 0.03,
+      "grad_norm": 1.2340985536575317,
+      "learning_rate": 4.998400786903896e-05,
+      "loss": 0.885,
+      "step": 215
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 1.7219617366790771,
+      "learning_rate": 4.9983251927532315e-05,
+      "loss": 1.1069,
+      "step": 220
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 1.1480705738067627,
+      "learning_rate": 4.9982478535679924e-05,
+      "loss": 1.0416,
+      "step": 225
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 1.515589714050293,
+      "learning_rate": 4.9981687694021996e-05,
+      "loss": 1.1844,
+      "step": 230
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 1.6687963008880615,
+      "learning_rate": 4.998087940311091e-05,
+      "loss": 0.8664,
+      "step": 235
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 1.9256645441055298,
+      "learning_rate": 4.998005366351125e-05,
+      "loss": 1.0125,
+      "step": 240
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 1.2500052452087402,
+      "learning_rate": 4.997921047579978e-05,
+      "loss": 1.1374,
+      "step": 245
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 1.0543216466903687,
+      "learning_rate": 4.9978349840565434e-05,
+      "loss": 0.8502,
+      "step": 250
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 1.3009012937545776,
+      "learning_rate": 4.997747175840937e-05,
+      "loss": 1.0357,
+      "step": 255
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 0.8456661105155945,
+      "learning_rate": 4.997657622994491e-05,
+      "loss": 0.6883,
+      "step": 260
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 0.5856515765190125,
+      "learning_rate": 4.9975663255797555e-05,
+      "loss": 0.7656,
+      "step": 265
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 0.973818302154541,
+      "learning_rate": 4.997473283660501e-05,
+      "loss": 0.823,
+      "step": 270
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 0.9960187673568726,
+      "learning_rate": 4.997378497301715e-05,
+      "loss": 0.8726,
+      "step": 275
+    },
+    {
+      "epoch": 0.04,
+      "grad_norm": 1.2900679111480713,
+      "learning_rate": 4.997281966569604e-05,
+      "loss": 0.9781,
+      "step": 280
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 1.828894853591919,
+      "learning_rate": 4.9971836915315926e-05,
+      "loss": 0.8932,
+      "step": 285
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 1.239621877670288,
+      "learning_rate": 4.9970836722563256e-05,
+      "loss": 1.2022,
+      "step": 290
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 1.0117149353027344,
+      "learning_rate": 4.996981908813664e-05,
+      "loss": 0.8032,
+      "step": 295
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 0.8861119747161865,
+      "learning_rate": 4.996878401274687e-05,
+      "loss": 1.0651,
+      "step": 300
+    },
+    {
+      "epoch": 0.05,
+      "eval_loss": 0.8281473517417908,
+      "eval_runtime": 96.5283,
+      "eval_samples_per_second": 7.221,
+      "eval_steps_per_second": 7.221,
+      "step": 300
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 0.8583046197891235,
+      "learning_rate": 4.996773149711693e-05,
+      "loss": 0.8784,
+      "step": 305
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 2.5717499256134033,
+      "learning_rate": 4.9966661541981984e-05,
+      "loss": 0.8395,
+      "step": 310
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 0.982342004776001,
+      "learning_rate": 4.9965574148089376e-05,
+      "loss": 0.9869,
+      "step": 315
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 0.9000777006149292,
+      "learning_rate": 4.9964469316198633e-05,
+      "loss": 0.8435,
+      "step": 320
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 0.8733209371566772,
+      "learning_rate": 4.9963347047081464e-05,
+      "loss": 0.7281,
+      "step": 325
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 3.323739767074585,
+      "learning_rate": 4.9962207341521746e-05,
+      "loss": 1.1013,
+      "step": 330
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 1.7102876901626587,
+      "learning_rate": 4.996105020031554e-05,
+      "loss": 0.8276,
+      "step": 335
+    },
+    {
+      "epoch": 0.05,
+      "grad_norm": 0.9196123480796814,
+      "learning_rate": 4.995987562427109e-05,
+      "loss": 0.8274,
+      "step": 340
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 1.210099458694458,
+      "learning_rate": 4.995868361420883e-05,
+      "loss": 1.3257,
+      "step": 345
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 0.8923581838607788,
+      "learning_rate": 4.9957474170961335e-05,
+      "loss": 0.6815,
+      "step": 350
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 0.9576735496520996,
+      "learning_rate": 4.9956247295373396e-05,
+      "loss": 1.23,
+      "step": 355
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 1.3774089813232422,
+      "learning_rate": 4.995500298830196e-05,
+      "loss": 1.0556,
+      "step": 360
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 1.1523677110671997,
+      "learning_rate": 4.995374125061614e-05,
+      "loss": 1.1787,
+      "step": 365
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 0.8310608863830566,
+      "learning_rate": 4.9952462083197246e-05,
+      "loss": 0.8525,
+      "step": 370
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 0.9814196825027466,
+      "learning_rate": 4.9951165486938765e-05,
+      "loss": 0.8522,
+      "step": 375
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 0.9878122210502625,
+      "learning_rate": 4.994985146274633e-05,
+      "loss": 0.6618,
+      "step": 380
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 1.2652586698532104,
+      "learning_rate": 4.994852001153777e-05,
+      "loss": 1.0489,
+      "step": 385
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 1.2940975427627563,
+      "learning_rate": 4.994717113424307e-05,
+      "loss": 1.104,
+      "step": 390
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 0.9636249542236328,
+      "learning_rate": 4.99458048318044e-05,
+      "loss": 0.9228,
+      "step": 395
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 0.8122813105583191,
+      "learning_rate": 4.994442110517611e-05,
+      "loss": 0.9209,
+      "step": 400
+    },
+    {
+      "epoch": 0.06,
+      "eval_loss": 0.8184689879417419,
+      "eval_runtime": 96.4572,
+      "eval_samples_per_second": 7.226,
+      "eval_steps_per_second": 7.226,
+      "step": 400
+    },
+    {
+      "epoch": 0.06,
+      "grad_norm": 0.8742052912712097,
+      "learning_rate": 4.99430199553247e-05,
+      "loss": 0.9608,
+      "step": 405
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 0.5679522752761841,
+      "learning_rate": 4.9941601383228835e-05,
+      "loss": 0.5963,
+      "step": 410
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 1.0234627723693848,
+      "learning_rate": 4.994016538987938e-05,
+      "loss": 0.8642,
+      "step": 415
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 0.8581897616386414,
+      "learning_rate": 4.993871197627934e-05,
+      "loss": 0.8993,
+      "step": 420
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 1.4666485786437988,
+      "learning_rate": 4.9937241143443904e-05,
+      "loss": 0.8565,
+      "step": 425
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 1.1166578531265259,
+      "learning_rate": 4.993575289240041e-05,
+      "loss": 0.881,
+      "step": 430
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 1.303992748260498,
+      "learning_rate": 4.9934247224188393e-05,
+      "loss": 0.9962,
+      "step": 435
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 0.9011989235877991,
+      "learning_rate": 4.993272413985952e-05,
+      "loss": 0.9316,
+      "step": 440
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 0.8321458101272583,
+      "learning_rate": 4.993118364047764e-05,
+      "loss": 0.7889,
+      "step": 445
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 0.7780352234840393,
+      "learning_rate": 4.992962572711877e-05,
+      "loss": 0.8287,
+      "step": 450
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 0.9090210199356079,
+      "learning_rate": 4.992805040087108e-05,
+      "loss": 0.7018,
+      "step": 455
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 0.8694137334823608,
+      "learning_rate": 4.9926457662834906e-05,
+      "loss": 0.8484,
+      "step": 460
+    },
+    {
+      "epoch": 0.07,
+      "grad_norm": 0.6327371001243591,
+      "learning_rate": 4.992484751412274e-05,
+      "loss": 0.716,
+      "step": 465
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 1.200668215751648,
+      "learning_rate": 4.9923219955859254e-05,
+      "loss": 0.9525,
+      "step": 470
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.8530198931694031,
+      "learning_rate": 4.9921574989181266e-05,
+      "loss": 0.744,
+      "step": 475
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 1.168479323387146,
+      "learning_rate": 4.991991261523775e-05,
+      "loss": 0.729,
+      "step": 480
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.9499714970588684,
+      "learning_rate": 4.9918232835189834e-05,
+      "loss": 0.7725,
+      "step": 485
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.8434467911720276,
+      "learning_rate": 4.991653565021084e-05,
+      "loss": 1.1558,
+      "step": 490
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.7665804624557495,
+      "learning_rate": 4.99148210614862e-05,
+      "loss": 1.0208,
+      "step": 495
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.5782546401023865,
+      "learning_rate": 4.991308907021353e-05,
+      "loss": 0.8306,
+      "step": 500
+    },
+    {
+      "epoch": 0.08,
+      "eval_loss": 0.8132078051567078,
+      "eval_runtime": 96.433,
+      "eval_samples_per_second": 7.228,
+      "eval_steps_per_second": 7.228,
+      "step": 500
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 1.0821778774261475,
+      "learning_rate": 4.9911339677602584e-05,
+      "loss": 0.9503,
+      "step": 505
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.5409029126167297,
+      "learning_rate": 4.99095728848753e-05,
+      "loss": 0.8586,
+      "step": 510
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 0.9011789560317993,
+      "learning_rate": 4.990778869326575e-05,
+      "loss": 0.7981,
+      "step": 515
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 1.0092263221740723,
+      "learning_rate": 4.990598710402013e-05,
+      "loss": 1.0174,
+      "step": 520
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 1.4362307786941528,
+      "learning_rate": 4.9904168118396844e-05,
+      "loss": 0.8373,
+      "step": 525
+    },
+    {
+      "epoch": 0.08,
+      "grad_norm": 2.1772639751434326,
+      "learning_rate": 4.9902331737666414e-05,
+      "loss": 0.9599,
+      "step": 530
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 0.9610542058944702,
+      "learning_rate": 4.990047796311151e-05,
+      "loss": 0.6895,
+      "step": 535
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 0.9922348260879517,
+      "learning_rate": 4.989860679602698e-05,
+      "loss": 0.7315,
+      "step": 540
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 1.2409151792526245,
+      "learning_rate": 4.9896718237719785e-05,
+      "loss": 0.8574,
+      "step": 545
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 1.016333818435669,
+      "learning_rate": 4.9894812289509046e-05,
+      "loss": 1.1248,
+      "step": 550
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 0.9131489396095276,
+      "learning_rate": 4.989288895272604e-05,
+      "loss": 0.9847,
+      "step": 555
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 1.215469479560852,
+      "learning_rate": 4.989094822871419e-05,
+      "loss": 0.912,
+      "step": 560
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 1.0536105632781982,
+      "learning_rate": 4.988899011882903e-05,
+      "loss": 0.8425,
+      "step": 565
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 1.9705311059951782,
+      "learning_rate": 4.988701462443829e-05,
+      "loss": 0.9385,
+      "step": 570
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 1.2488442659378052,
+      "learning_rate": 4.98850217469218e-05,
+      "loss": 0.7865,
+      "step": 575
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 1.7318600416183472,
+      "learning_rate": 4.988301148767157e-05,
+      "loss": 0.8231,
+      "step": 580
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 0.8247858881950378,
+      "learning_rate": 4.9880983848091704e-05,
+      "loss": 0.8553,
+      "step": 585
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 0.858172595500946,
+      "learning_rate": 4.987893882959849e-05,
+      "loss": 1.3952,
+      "step": 590
+    },
+    {
+      "epoch": 0.09,
+      "grad_norm": 1.2286418676376343,
+      "learning_rate": 4.987687643362033e-05,
+      "loss": 0.837,
+      "step": 595
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 1.034350872039795,
+      "learning_rate": 4.9874796661597765e-05,
+      "loss": 0.9175,
+      "step": 600
+    },
+    {
+      "epoch": 0.1,
+      "eval_loss": 0.8063747882843018,
+      "eval_runtime": 96.4224,
+      "eval_samples_per_second": 7.229,
+      "eval_steps_per_second": 7.229,
+      "step": 600
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 0.7192366123199463,
+      "learning_rate": 4.987269951498348e-05,
+      "loss": 0.8563,
+      "step": 605
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 1.2645854949951172,
+      "learning_rate": 4.98705849952423e-05,
+      "loss": 0.6663,
+      "step": 610
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 1.0610381364822388,
+      "learning_rate": 4.9868453103851176e-05,
+      "loss": 0.8452,
+      "step": 615
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 0.8550002574920654,
+      "learning_rate": 4.986630384229919e-05,
+      "loss": 0.8894,
+      "step": 620
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 0.7490519285202026,
+      "learning_rate": 4.986413721208757e-05,
+      "loss": 0.9106,
+      "step": 625
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 0.557860255241394,
+      "learning_rate": 4.986195321472965e-05,
+      "loss": 0.685,
+      "step": 630
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 0.7450752258300781,
+      "learning_rate": 4.9859751851750934e-05,
+      "loss": 0.8472,
+      "step": 635
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 1.176376461982727,
+      "learning_rate": 4.985753312468903e-05,
+      "loss": 1.0197,
+      "step": 640
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 1.0625300407409668,
+      "learning_rate": 4.985529703509367e-05,
+      "loss": 0.9685,
+      "step": 645
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 0.8808372616767883,
+      "learning_rate": 4.985304358452672e-05,
+      "loss": 0.8612,
+      "step": 650
+    },
+    {
+      "epoch": 0.1,
+      "grad_norm": 0.8110201954841614,
+      "learning_rate": 4.985077277456218e-05,
+      "loss": 0.8401,
+      "step": 655
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 0.9364888072013855,
+      "learning_rate": 4.984848460678618e-05,
+      "loss": 0.6197,
+      "step": 660
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 1.0113518238067627,
+      "learning_rate": 4.984617908279694e-05,
+      "loss": 0.9889,
+      "step": 665
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 1.1148868799209595,
+      "learning_rate": 4.984385620420485e-05,
+      "loss": 0.9558,
+      "step": 670
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 0.9506175518035889,
+      "learning_rate": 4.984151597263238e-05,
+      "loss": 0.7323,
+      "step": 675
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 1.0044193267822266,
+      "learning_rate": 4.983915838971415e-05,
+      "loss": 0.7504,
+      "step": 680
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 2.2674214839935303,
+      "learning_rate": 4.9836783457096875e-05,
+      "loss": 1.032,
+      "step": 685
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 1.4945333003997803,
+      "learning_rate": 4.983439117643942e-05,
+      "loss": 1.0359,
+      "step": 690
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 0.9860715866088867,
+      "learning_rate": 4.9831981549412744e-05,
+      "loss": 1.1152,
+      "step": 695
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 0.8287227153778076,
+      "learning_rate": 4.982955457769992e-05,
+      "loss": 0.8157,
+      "step": 700
+    },
+    {
+      "epoch": 0.11,
+      "eval_loss": 0.8022791743278503,
+      "eval_runtime": 96.5324,
+      "eval_samples_per_second": 7.22,
+      "eval_steps_per_second": 7.22,
+      "step": 700
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 0.9216273427009583,
+      "learning_rate": 4.9827110262996144e-05,
+      "loss": 0.8395,
+      "step": 705
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 0.7642357349395752,
+      "learning_rate": 4.982464860700874e-05,
+      "loss": 0.8817,
+      "step": 710
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 0.8851175308227539,
+      "learning_rate": 4.982216961145711e-05,
+      "loss": 0.8558,
+      "step": 715
+    },
+    {
+      "epoch": 0.11,
+      "grad_norm": 0.44226109981536865,
+      "learning_rate": 4.98196732780728e-05,
+      "loss": 0.882,
+      "step": 720
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 0.8005027174949646,
+      "learning_rate": 4.981715960859945e-05,
+      "loss": 0.8835,
+      "step": 725
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 0.7451304793357849,
+      "learning_rate": 4.981462860479281e-05,
+      "loss": 0.8551,
+      "step": 730
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 1.1069347858428955,
+      "learning_rate": 4.9812080268420745e-05,
+      "loss": 0.999,
+      "step": 735
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 0.8892244100570679,
+      "learning_rate": 4.980951460126322e-05,
+      "loss": 1.012,
+      "step": 740
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 0.8935977816581726,
+      "learning_rate": 4.9806931605112305e-05,
+      "loss": 0.9911,
+      "step": 745
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 0.8456961512565613,
+      "learning_rate": 4.9804331281772176e-05,
+      "loss": 0.7595,
+      "step": 750
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 0.78443443775177,
+      "learning_rate": 4.980171363305911e-05,
+      "loss": 0.8308,
+      "step": 755
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 1.0028038024902344,
+      "learning_rate": 4.979907866080149e-05,
+      "loss": 0.9637,
+      "step": 760
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 1.1801577806472778,
+      "learning_rate": 4.9796426366839786e-05,
+      "loss": 0.6159,
+      "step": 765
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 0.8370681405067444,
+      "learning_rate": 4.979375675302659e-05,
+      "loss": 0.9276,
+      "step": 770
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 0.8605382442474365,
+      "learning_rate": 4.979106982122658e-05,
+      "loss": 1.1077,
+      "step": 775
+    },
+    {
+      "epoch": 0.12,
+      "grad_norm": 0.7788259387016296,
+      "learning_rate": 4.978836557331652e-05,
+      "loss": 0.8172,
+      "step": 780
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 1.4312686920166016,
+      "learning_rate": 4.978564401118528e-05,
+      "loss": 0.8759,
+      "step": 785
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 0.9109662175178528,
+      "learning_rate": 4.978290513673381e-05,
+      "loss": 0.947,
+      "step": 790
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 1.1819065809249878,
+      "learning_rate": 4.9780148951875195e-05,
+      "loss": 0.7364,
+      "step": 795
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 0.9400575160980225,
+      "learning_rate": 4.977737545853455e-05,
+      "loss": 0.9469,
+      "step": 800
+    },
+    {
+      "epoch": 0.13,
+      "eval_loss": 0.7995806932449341,
+      "eval_runtime": 96.5877,
+      "eval_samples_per_second": 7.216,
+      "eval_steps_per_second": 7.216,
+      "step": 800
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 1.693812370300293,
+      "learning_rate": 4.9774584658649126e-05,
+      "loss": 0.9433,
+      "step": 805
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 1.0892895460128784,
+      "learning_rate": 4.9771776554168234e-05,
+      "loss": 0.7027,
+      "step": 810
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 0.9118362665176392,
+      "learning_rate": 4.976895114705329e-05,
+      "loss": 0.9468,
+      "step": 815
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 0.8032681345939636,
+      "learning_rate": 4.976610843927779e-05,
+      "loss": 0.7927,
+      "step": 820
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 1.168225646018982,
+      "learning_rate": 4.976324843282732e-05,
+      "loss": 0.9673,
+      "step": 825
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 1.077602744102478,
+      "learning_rate": 4.976037112969953e-05,
+      "loss": 0.9156,
+      "step": 830
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 0.8643108606338501,
+      "learning_rate": 4.9757476531904165e-05,
+      "loss": 0.6999,
+      "step": 835
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 0.933397650718689,
+      "learning_rate": 4.975456464146306e-05,
+      "loss": 0.8828,
+      "step": 840
+    },
+    {
+      "epoch": 0.13,
+      "grad_norm": 0.7036295533180237,
+      "learning_rate": 4.975163546041011e-05,
+      "loss": 0.8709,
+      "step": 845
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 0.5974694490432739,
+      "learning_rate": 4.974868899079128e-05,
+      "loss": 0.7594,
+      "step": 850
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 0.7244943380355835,
+      "learning_rate": 4.974572523466465e-05,
+      "loss": 0.8714,
+      "step": 855
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 0.5783522725105286,
+      "learning_rate": 4.9742744194100345e-05,
+      "loss": 0.8941,
+      "step": 860
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 0.7480617761611938,
+      "learning_rate": 4.973974587118055e-05,
+      "loss": 0.9798,
+      "step": 865
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 0.7548874020576477,
+      "learning_rate": 4.973673026799956e-05,
+      "loss": 0.7767,
+      "step": 870
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 0.7075071930885315,
+      "learning_rate": 4.97336973866637e-05,
+      "loss": 0.7779,
+      "step": 875
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 0.7042987942695618,
+      "learning_rate": 4.97306472292914e-05,
+      "loss": 0.8249,
+      "step": 880
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 1.0242459774017334,
+      "learning_rate": 4.972757979801313e-05,
+      "loss": 0.9223,
+      "step": 885
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 0.6138095259666443,
+      "learning_rate": 4.9724495094971436e-05,
+      "loss": 0.9842,
+      "step": 890
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 0.7905042767524719,
+      "learning_rate": 4.9721393122320925e-05,
+      "loss": 0.8738,
+      "step": 895
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 0.9658048748970032,
+      "learning_rate": 4.9718273882228265e-05,
+      "loss": 0.8872,
+      "step": 900
+    },
+    {
+      "epoch": 0.14,
+      "eval_loss": 0.7954564690589905,
+      "eval_runtime": 96.643,
+      "eval_samples_per_second": 7.212,
+      "eval_steps_per_second": 7.212,
+      "step": 900
+    },
+    {
+      "epoch": 0.14,
+      "grad_norm": 0.8425014019012451,
+      "learning_rate": 4.97151373768722e-05,
+      "loss": 0.778,
+      "step": 905
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 0.5527231693267822,
+      "learning_rate": 4.971198360844351e-05,
+      "loss": 0.8332,
+      "step": 910
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 0.7870334386825562,
+      "learning_rate": 4.9708812579145056e-05,
+      "loss": 0.9265,
+      "step": 915
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 0.9935321807861328,
+      "learning_rate": 4.970562429119173e-05,
+      "loss": 0.7243,
+      "step": 920
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 0.9546892046928406,
+      "learning_rate": 4.970241874681051e-05,
+      "loss": 0.9908,
+      "step": 925
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 0.7340118885040283,
+      "learning_rate": 4.969919594824039e-05,
+      "loss": 0.7932,
+      "step": 930
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 5.1686015129089355,
+      "learning_rate": 4.9695955897732453e-05,
+      "loss": 0.9842,
+      "step": 935
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 0.9721456170082092,
+      "learning_rate": 4.9692698597549815e-05,
+      "loss": 0.9271,
+      "step": 940
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 0.6477334499359131,
+      "learning_rate": 4.9689424049967623e-05,
+      "loss": 0.934,
+      "step": 945
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 1.0759055614471436,
+      "learning_rate": 4.968613225727311e-05,
+      "loss": 1.0465,
+      "step": 950
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 0.7222158908843994,
+      "learning_rate": 4.968282322176552e-05,
+      "loss": 0.7732,
+      "step": 955
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 0.8591343760490417,
+      "learning_rate": 4.9679496945756155e-05,
+      "loss": 0.9062,
+      "step": 960
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 1.8495111465454102,
+      "learning_rate": 4.967615343156837e-05,
+      "loss": 0.8861,
+      "step": 965
+    },
+    {
+      "epoch": 0.15,
+      "grad_norm": 0.6847331523895264,
+      "learning_rate": 4.967279268153753e-05,
+      "loss": 0.8001,
+      "step": 970
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 0.690113365650177,
+      "learning_rate": 4.9669414698011074e-05,
+      "loss": 0.7378,
+      "step": 975
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 0.8349626064300537,
+      "learning_rate": 4.9666019483348456e-05,
+      "loss": 0.7193,
+      "step": 980
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 0.6444108486175537,
+      "learning_rate": 4.966260703992116e-05,
+      "loss": 0.8729,
+      "step": 985
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 0.9515655040740967,
+      "learning_rate": 4.965917737011274e-05,
+      "loss": 0.7532,
+      "step": 990
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 0.8138986229896545,
+      "learning_rate": 4.965573047631873e-05,
+      "loss": 1.0124,
+      "step": 995
+    },
+    {
+      "epoch": 0.16,
+      "grad_norm": 1.0182080268859863,
+      "learning_rate": 4.9652266360946745e-05,
+      "loss": 0.8842,
+      "step": 1000
+    },
+    {
+      "epoch": 0.16,
+      "eval_loss": 0.7912728190422058,
+      "eval_runtime": 96.5004,
+      "eval_samples_per_second": 7.223,
+      "eval_steps_per_second": 7.223,
+      "step": 1000
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 18795,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "total_flos": 4.3155317587968e+16,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-1000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aba1ff1fdda3287196c6ef142366a8ee27aa213077a93b2e39492dd1603ae72f
+size 5048

checkpoint-1000/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-10000/README.md ADDED Viewed

	@@ -0,0 +1,204 @@

+---
+library_name: peft
+base_model: bigcode/starcoder2-7b
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.9.0

checkpoint-10000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder2-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.15,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_fc",
+    "o_proj",
+    "q_proj",
+    "v_proj",
+    "k_proj",
+    "c_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-10000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b69b187b6aed04aa1dee26fa2c5a53a3703730adbc1241ba98d35e7b14bbcf0
+size 306235552

checkpoint-10000/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-10000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:97a2e05763c3411f9f0e9b159d66c459c6c0753f6c4de5affec3a7ad0cb37acf
+size 612692114

checkpoint-10000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ad759fd9a3102b74e5668b6462bee2ef2882f610228e5d30d85770644c87369
+size 14244

checkpoint-10000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f14fe74239f29afb85e174adc4b5e87cdbe8d520f9eaece0a2fdea90450a5bdf
+size 1064

checkpoint-10000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "<fim_prefix>",
+    "<fim_middle>",
+    "<fim_suffix>",
+    "<fim_pad>",
+    "<repo_name>",
+    "<file_sep>",
+    "<issue_start>",
+    "<issue_comment>",
+    "<issue_closed>",
+    "<jupyter_start>",
+    "<jupyter_text>",
+    "<jupyter_code>",
+    "<jupyter_output>",
+    "<jupyter_script>",
+    "<empty_output>",
+    "<code_to_intermediate>",
+    "<intermediate_to_code>",
+    "<pr>",
+    "<pr_status>",
+    "<pr_is_merged>",
+    "<pr_base>",
+    "<pr_file>",
+    "<pr_base_code>",
+    "<pr_diff>",
+    "<pr_diff_hunk>",
+    "<pr_comment>",
+    "<pr_event_id>",
+    "<pr_review>",
+    "<pr_review_state>",
+    "<pr_review_comment>",
+    "<pr_in_reply_to_review_id>",
+    "<pr_in_reply_to_comment_id>",
+    "<pr_diff_hunk_comment_line>",
+    "<NAME>",
+    "<EMAIL>",
+    "<KEY>",
+    "<PASSWORD>"
+  ],
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-10000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,361 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<fim_prefix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<fim_middle>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<fim_suffix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<fim_pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<repo_name>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<file_sep>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<issue_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "<issue_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<issue_closed>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "<jupyter_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<jupyter_text>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "<jupyter_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "<jupyter_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "<jupyter_script>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "<empty_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "<code_to_intermediate>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "<intermediate_to_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "<pr>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "19": {
+      "content": "<pr_status>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "20": {
+      "content": "<pr_is_merged>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "21": {
+      "content": "<pr_base>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "22": {
+      "content": "<pr_file>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "23": {
+      "content": "<pr_base_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "24": {
+      "content": "<pr_diff>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "25": {
+      "content": "<pr_diff_hunk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "26": {
+      "content": "<pr_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "27": {
+      "content": "<pr_event_id>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "28": {
+      "content": "<pr_review>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "29": {
+      "content": "<pr_review_state>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30": {
+      "content": "<pr_review_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "31": {
+      "content": "<pr_in_reply_to_review_id>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32": {
+      "content": "<pr_in_reply_to_comment_id>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "33": {
+      "content": "<pr_diff_hunk_comment_line>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "34": {
+      "content": "<NAME>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "35": {
+      "content": "<EMAIL>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "36": {
+      "content": "<KEY>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "37": {
+      "content": "<PASSWORD>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "<fim_prefix>",
+    "<fim_middle>",
+    "<fim_suffix>",
+    "<fim_pad>",
+    "<repo_name>",
+    "<file_sep>",
+    "<issue_start>",
+    "<issue_comment>",
+    "<issue_closed>",
+    "<jupyter_start>",
+    "<jupyter_text>",
+    "<jupyter_code>",
+    "<jupyter_output>",
+    "<jupyter_script>",
+    "<empty_output>",
+    "<code_to_intermediate>",
+    "<intermediate_to_code>",
+    "<pr>",
+    "<pr_status>",
+    "<pr_is_merged>",
+    "<pr_base>",
+    "<pr_file>",
+    "<pr_base_code>",
+    "<pr_diff>",
+    "<pr_diff_hunk>",
+    "<pr_comment>",
+    "<pr_event_id>",
+    "<pr_review>",
+    "<pr_review_state>",
+    "<pr_review_comment>",
+    "<pr_in_reply_to_review_id>",
+    "<pr_in_reply_to_comment_id>",
+    "<pr_diff_hunk_comment_line>",
+    "<NAME>",
+    "<EMAIL>",
+    "<KEY>",
+    "<PASSWORD>"
+  ],
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>",
+  "vocab_size": 49152
+}

checkpoint-10000/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-10000/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aba1ff1fdda3287196c6ef142366a8ee27aa213077a93b2e39492dd1603ae72f
+size 5048

checkpoint-10000/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-10500/README.md ADDED Viewed

	@@ -0,0 +1,204 @@

+---
+library_name: peft
+base_model: bigcode/starcoder2-7b
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.9.0

checkpoint-10500/adapter_config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder2-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.15,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_fc",
+    "o_proj",
+    "q_proj",
+    "v_proj",
+    "k_proj",
+    "c_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-10500/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8a24f1b860c0718a0f599e20cfcde60dba4e6264959675f3b050fd71f6424047
+size 306235552

checkpoint-10500/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-10500/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c83ba95cd90b11b3f8bdfa26bd0eaf2475e8e2a9b6427b1a2f127eeaba7e2f0d
+size 612692114

checkpoint-10500/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ebc4d68c6ba09b134d762ec466777351973214f1a16cc6cd31114ef38347d4ae
+size 14244

checkpoint-10500/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a0ae137ef9ab2f88c67af62bdf6ab3d615076a309f9c848b2969ed4df1a7b1d6
+size 1064

checkpoint-10500/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "<fim_prefix>",
+    "<fim_middle>",
+    "<fim_suffix>",
+    "<fim_pad>",
+    "<repo_name>",
+    "<file_sep>",
+    "<issue_start>",
+    "<issue_comment>",
+    "<issue_closed>",
+    "<jupyter_start>",
+    "<jupyter_text>",
+    "<jupyter_code>",
+    "<jupyter_output>",
+    "<jupyter_script>",
+    "<empty_output>",
+    "<code_to_intermediate>",
+    "<intermediate_to_code>",
+    "<pr>",
+    "<pr_status>",
+    "<pr_is_merged>",
+    "<pr_base>",
+    "<pr_file>",
+    "<pr_base_code>",
+    "<pr_diff>",
+    "<pr_diff_hunk>",
+    "<pr_comment>",
+    "<pr_event_id>",
+    "<pr_review>",
+    "<pr_review_state>",
+    "<pr_review_comment>",
+    "<pr_in_reply_to_review_id>",
+    "<pr_in_reply_to_comment_id>",
+    "<pr_diff_hunk_comment_line>",
+    "<NAME>",
+    "<EMAIL>",
+    "<KEY>",
+    "<PASSWORD>"
+  ],
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-10500/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,361 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<fim_prefix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<fim_middle>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<fim_suffix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<fim_pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<repo_name>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<file_sep>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<issue_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "<issue_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<issue_closed>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "<jupyter_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<jupyter_text>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "<jupyter_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "<jupyter_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "<jupyter_script>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "<empty_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "<code_to_intermediate>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "<intermediate_to_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "<pr>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "19": {
+      "content": "<pr_status>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "20": {
+      "content": "<pr_is_merged>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "21": {
+      "content": "<pr_base>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "22": {
+      "content": "<pr_file>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "23": {
+      "content": "<pr_base_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "24": {
+      "content": "<pr_diff>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "25": {
+      "content": "<pr_diff_hunk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "26": {
+      "content": "<pr_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "27": {
+      "content": "<pr_event_id>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "28": {
+      "content": "<pr_review>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "29": {
+      "content": "<pr_review_state>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30": {
+      "content": "<pr_review_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "31": {
+      "content": "<pr_in_reply_to_review_id>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32": {
+      "content": "<pr_in_reply_to_comment_id>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "33": {
+      "content": "<pr_diff_hunk_comment_line>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "34": {
+      "content": "<NAME>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "35": {
+      "content": "<EMAIL>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "36": {
+      "content": "<KEY>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "37": {
+      "content": "<PASSWORD>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "<fim_prefix>",
+    "<fim_middle>",
+    "<fim_suffix>",
+    "<fim_pad>",
+    "<repo_name>",
+    "<file_sep>",
+    "<issue_start>",
+    "<issue_comment>",
+    "<issue_closed>",
+    "<jupyter_start>",
+    "<jupyter_text>",
+    "<jupyter_code>",
+    "<jupyter_output>",
+    "<jupyter_script>",
+    "<empty_output>",
+    "<code_to_intermediate>",
+    "<intermediate_to_code>",
+    "<pr>",
+    "<pr_status>",
+    "<pr_is_merged>",
+    "<pr_base>",
+    "<pr_file>",
+    "<pr_base_code>",
+    "<pr_diff>",
+    "<pr_diff_hunk>",
+    "<pr_comment>",
+    "<pr_event_id>",
+    "<pr_review>",
+    "<pr_review_state>",
+    "<pr_review_comment>",
+    "<pr_in_reply_to_review_id>",
+    "<pr_in_reply_to_comment_id>",
+    "<pr_diff_hunk_comment_line>",
+    "<NAME>",
+    "<EMAIL>",
+    "<KEY>",
+    "<PASSWORD>"
+  ],
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>",
+  "vocab_size": 49152
+}

checkpoint-10500/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-10500/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aba1ff1fdda3287196c6ef142366a8ee27aa213077a93b2e39492dd1603ae72f
+size 5048

checkpoint-10500/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-11000/README.md ADDED Viewed

	@@ -0,0 +1,204 @@

+---
+library_name: peft
+base_model: bigcode/starcoder2-7b
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.9.0

checkpoint-11000/adapter_config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "bigcode/starcoder2-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 64,
+  "lora_dropout": 0.15,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "c_fc",
+    "o_proj",
+    "q_proj",
+    "v_proj",
+    "k_proj",
+    "c_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-11000/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9019013901b60deee9f2f66de29e225413c7311fe2715374be32987436a4e8e8
+size 306235552

checkpoint-11000/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-11000/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3fdb206d0d46198a2c519e1c9cd4fffe525cfd4c1af825722561b1213a4ea7e6
+size 612692114

checkpoint-11000/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3e03cd57181c93286c55a2285d990849817b30e7f22c295a8fc298e711a5bf77
+size 14244

checkpoint-11000/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0e2ae236fbb9e6aa2493215f88876392eb92278816ada028bd155f1921609e2
+size 1064

checkpoint-11000/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,64 @@

+{
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "<fim_prefix>",
+    "<fim_middle>",
+    "<fim_suffix>",
+    "<fim_pad>",
+    "<repo_name>",
+    "<file_sep>",
+    "<issue_start>",
+    "<issue_comment>",
+    "<issue_closed>",
+    "<jupyter_start>",
+    "<jupyter_text>",
+    "<jupyter_code>",
+    "<jupyter_output>",
+    "<jupyter_script>",
+    "<empty_output>",
+    "<code_to_intermediate>",
+    "<intermediate_to_code>",
+    "<pr>",
+    "<pr_status>",
+    "<pr_is_merged>",
+    "<pr_base>",
+    "<pr_file>",
+    "<pr_base_code>",
+    "<pr_diff>",
+    "<pr_diff_hunk>",
+    "<pr_comment>",
+    "<pr_event_id>",
+    "<pr_review>",
+    "<pr_review_state>",
+    "<pr_review_comment>",
+    "<pr_in_reply_to_review_id>",
+    "<pr_in_reply_to_comment_id>",
+    "<pr_diff_hunk_comment_line>",
+    "<NAME>",
+    "<EMAIL>",
+    "<KEY>",
+    "<PASSWORD>"
+  ],
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-11000/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,361 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<fim_prefix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<fim_middle>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<fim_suffix>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<fim_pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "5": {
+      "content": "<repo_name>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "6": {
+      "content": "<file_sep>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "7": {
+      "content": "<issue_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "8": {
+      "content": "<issue_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "9": {
+      "content": "<issue_closed>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "10": {
+      "content": "<jupyter_start>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "11": {
+      "content": "<jupyter_text>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "12": {
+      "content": "<jupyter_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "13": {
+      "content": "<jupyter_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "14": {
+      "content": "<jupyter_script>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "15": {
+      "content": "<empty_output>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "16": {
+      "content": "<code_to_intermediate>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "17": {
+      "content": "<intermediate_to_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "18": {
+      "content": "<pr>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "19": {
+      "content": "<pr_status>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "20": {
+      "content": "<pr_is_merged>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "21": {
+      "content": "<pr_base>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "22": {
+      "content": "<pr_file>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "23": {
+      "content": "<pr_base_code>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "24": {
+      "content": "<pr_diff>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "25": {
+      "content": "<pr_diff_hunk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "26": {
+      "content": "<pr_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "27": {
+      "content": "<pr_event_id>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "28": {
+      "content": "<pr_review>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "29": {
+      "content": "<pr_review_state>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30": {
+      "content": "<pr_review_comment>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "31": {
+      "content": "<pr_in_reply_to_review_id>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32": {
+      "content": "<pr_in_reply_to_comment_id>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "33": {
+      "content": "<pr_diff_hunk_comment_line>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "34": {
+      "content": "<NAME>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "35": {
+      "content": "<EMAIL>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "36": {
+      "content": "<KEY>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "37": {
+      "content": "<PASSWORD>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<|endoftext|>",
+    "<fim_prefix>",
+    "<fim_middle>",
+    "<fim_suffix>",
+    "<fim_pad>",
+    "<repo_name>",
+    "<file_sep>",
+    "<issue_start>",
+    "<issue_comment>",
+    "<issue_closed>",
+    "<jupyter_start>",
+    "<jupyter_text>",
+    "<jupyter_code>",
+    "<jupyter_output>",
+    "<jupyter_script>",
+    "<empty_output>",
+    "<code_to_intermediate>",
+    "<intermediate_to_code>",
+    "<pr>",
+    "<pr_status>",
+    "<pr_is_merged>",
+    "<pr_base>",
+    "<pr_file>",
+    "<pr_base_code>",
+    "<pr_diff>",
+    "<pr_diff_hunk>",
+    "<pr_comment>",
+    "<pr_event_id>",
+    "<pr_review>",
+    "<pr_review_state>",
+    "<pr_review_comment>",
+    "<pr_in_reply_to_review_id>",
+    "<pr_in_reply_to_comment_id>",
+    "<pr_diff_hunk_comment_line>",
+    "<NAME>",
+    "<EMAIL>",
+    "<KEY>",
+    "<PASSWORD>"
+  ],
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<|endoftext|>",
+  "padding_side": "right",
+  "split_special_tokens": false,
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>",
+  "vocab_size": 49152
+}

checkpoint-11000/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff