Wasody commited on Apr 11

Commit

cd3ccd0

verified ·

1 Parent(s): 3a58403

Upload folder using huggingface_hub

Browse files

Files changed (22) hide show

README.md +127 -0
adapter_config.json +37 -0
adapter_model.safetensors +3 -0
added_tokens.json +3 -0
checkpoint-63/README.md +202 -0
checkpoint-63/adapter_config.json +37 -0
checkpoint-63/adapter_model.safetensors +3 -0
checkpoint-63/added_tokens.json +3 -0
checkpoint-63/optimizer.pt +3 -0
checkpoint-63/rng_state.pth +3 -0
checkpoint-63/scheduler.pt +3 -0
checkpoint-63/special_tokens_map.json +30 -0
checkpoint-63/tokenizer.json +0 -0
checkpoint-63/tokenizer.model +3 -0
checkpoint-63/tokenizer_config.json +50 -0
checkpoint-63/trainer_state.json +474 -0
checkpoint-63/training_args.bin +3 -0
config.json +47 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +50 -0

README.md ADDED Viewed

	@@ -0,0 +1,127 @@

+---
+library_name: peft
+license: other
+base_model: Austism/chronos-hermes-13b-v2
+tags:
+- generated_from_trainer
+datasets:
+- Wasody/i_dont_know
+model-index:
+- name: outputs/mymodel
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
+<details><summary>See axolotl config</summary>
+axolotl version: `0.8.0.dev0`
+```yaml
+adapter: lora
+base_model: Austism/chronos-hermes-13b-v2
+bf16: auto
+dataset_processes: 32
+datasets:
+  - path: Wasody/i_dont_know
+    type: alpaca
+gradient_accumulation_steps: 1
+gradient_checkpointing: false
+learning_rate: 0.0002
+lisa_layers_attribute: model.layers
+load_best_model_at_end: false
+load_in_4bit: false
+load_in_8bit: true
+lora_alpha: 16
+lora_dropout: 0.05
+lora_r: 8
+lora_target_modules:
+- q_proj
+- v_proj
+- k_proj
+- o_proj
+- gate_proj
+- down_proj
+- up_proj
+loraplus_lr_embedding: 1.0e-06
+lr_scheduler: cosine
+max_prompt_len: 512
+mean_resizing_embeddings: false
+micro_batch_size: 16
+num_epochs: 1.0
+optimizer: adamw_bnb_8bit
+output_dir: ./outputs/mymodel
+pretrain_multipack_attn: true
+pretrain_multipack_buffer_size: 10000
+qlora_sharded_model_loading: false
+ray_num_workers: 1
+resources_per_worker:
+  GPU: 1
+sample_packing_bin_size: 200
+sample_packing_group_size: 100000
+save_only_model: false
+save_safetensors: true
+sequence_len: 4096
+shuffle_merged_datasets: true
+skip_prepare_dataset: false
+strict: false
+train_on_inputs: false
+trl:
+  log_completions: false
+  ref_model_mixup_alpha: 0.9
+  ref_model_sync_steps: 64
+  sync_ref_model: false
+  use_vllm: false
+  vllm_device: auto
+  vllm_dtype: auto
+  vllm_gpu_memory_utilization: 0.9
+use_ray: false
+val_set_size: 0.0
+weight_decay: 0.0
+```
+</details><br>
+# outputs/mymodel
+This model is a fine-tuned version of [Austism/chronos-hermes-13b-v2](https://huggingface.co/Austism/chronos-hermes-13b-v2) on the Wasody/i_dont_know dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0002
+- train_batch_size: 16
+- eval_batch_size: 16
+- seed: 42
+- optimizer: Use OptimizerNames.ADAMW_BNB with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_steps: 2
+- num_epochs: 1.0
+### Training results
+### Framework versions
+- PEFT 0.14.0
+- Transformers 4.49.0
+- Pytorch 2.5.1+cu124
+- Datasets 3.2.0
+- Tokenizers 0.21.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Austism/chronos-hermes-13b-v2",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "o_proj",
+    "gate_proj",
+    "q_proj",
+    "k_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d7b6031f3bf623dbbbc1aa379e42c0d5beb4987687835e46a4de47afe9135d2
+size 125248064

added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<pad>": 32000
+}

checkpoint-63/README.md ADDED Viewed

	@@ -0,0 +1,202 @@

+---
+base_model: Austism/chronos-hermes-13b-v2
+library_name: peft
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.14.0

checkpoint-63/adapter_config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "Austism/chronos-hermes-13b-v2",
+  "bias": "none",
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": null,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "down_proj",
+    "o_proj",
+    "gate_proj",
+    "q_proj",
+    "k_proj",
+    "up_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoint-63/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6d7b6031f3bf623dbbbc1aa379e42c0d5beb4987687835e46a4de47afe9135d2
+size 125248064

checkpoint-63/added_tokens.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "<pad>": 32000
+}

checkpoint-63/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba7f9e2030c3a20969903b0a5b7afbca0201bd6c77f4c5550f78f66b9e638a53
+size 64219860

checkpoint-63/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f63830503ec412fa6d567aeb42bbd639a8844cbf104edb2b2dbae3bc1984d1c6
+size 14244

checkpoint-63/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14cdd42bc68f5478d2d19f880fd75836186d87f9353155a18cf15c074c963eab
+size 1064

checkpoint-63/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoint-63/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-63/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

checkpoint-63/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<unk>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}

checkpoint-63/trainer_state.json ADDED Viewed

	@@ -0,0 +1,474 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 63,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.015873015873015872,
+      "grad_norm": 4.7454609870910645,
+      "learning_rate": 0.0001,
+      "loss": 2.5054,
+      "step": 1
+    },
+    {
+      "epoch": 0.031746031746031744,
+      "grad_norm": 4.466691017150879,
+      "learning_rate": 0.0002,
+      "loss": 2.2887,
+      "step": 2
+    },
+    {
+      "epoch": 0.047619047619047616,
+      "grad_norm": 5.193768501281738,
+      "learning_rate": 0.00019986740898848306,
+      "loss": 2.6748,
+      "step": 3
+    },
+    {
+      "epoch": 0.06349206349206349,
+      "grad_norm": 4.952482223510742,
+      "learning_rate": 0.0001994699875614589,
+      "loss": 0.9811,
+      "step": 4
+    },
+    {
+      "epoch": 0.07936507936507936,
+      "grad_norm": 1.4735137224197388,
+      "learning_rate": 0.00019880878960910772,
+      "loss": 0.1203,
+      "step": 5
+    },
+    {
+      "epoch": 0.09523809523809523,
+      "grad_norm": 0.16856016218662262,
+      "learning_rate": 0.0001978855685095358,
+      "loss": 0.0129,
+      "step": 6
+    },
+    {
+      "epoch": 0.1111111111111111,
+      "grad_norm": 0.10518310964107513,
+      "learning_rate": 0.00019670277247913205,
+      "loss": 0.0062,
+      "step": 7
+    },
+    {
+      "epoch": 0.12698412698412698,
+      "grad_norm": 0.046044230461120605,
+      "learning_rate": 0.00019526353808033825,
+      "loss": 0.003,
+      "step": 8
+    },
+    {
+      "epoch": 0.14285714285714285,
+      "grad_norm": 0.028279926627874374,
+      "learning_rate": 0.00019357168190404936,
+      "loss": 0.0018,
+      "step": 9
+    },
+    {
+      "epoch": 0.15873015873015872,
+      "grad_norm": 0.019267423078417778,
+      "learning_rate": 0.0001916316904487005,
+      "loss": 0.0011,
+      "step": 10
+    },
+    {
+      "epoch": 0.1746031746031746,
+      "grad_norm": 0.01724287122488022,
+      "learning_rate": 0.00018944870822287956,
+      "loss": 0.0009,
+      "step": 11
+    },
+    {
+      "epoch": 0.19047619047619047,
+      "grad_norm": 0.01141897588968277,
+      "learning_rate": 0.00018702852410301554,
+      "loss": 0.0006,
+      "step": 12
+    },
+    {
+      "epoch": 0.20634920634920634,
+      "grad_norm": 0.008453724905848503,
+      "learning_rate": 0.00018437755598231856,
+      "loss": 0.0004,
+      "step": 13
+    },
+    {
+      "epoch": 0.2222222222222222,
+      "grad_norm": 0.0063788313418626785,
+      "learning_rate": 0.00018150283375168114,
+      "loss": 0.0003,
+      "step": 14
+    },
+    {
+      "epoch": 0.23809523809523808,
+      "grad_norm": 0.0052197095938026905,
+      "learning_rate": 0.00017841198065767107,
+      "loss": 0.0002,
+      "step": 15
+    },
+    {
+      "epoch": 0.25396825396825395,
+      "grad_norm": 0.00616695499047637,
+      "learning_rate": 0.00017511319308705198,
+      "loss": 0.0002,
+      "step": 16
+    },
+    {
+      "epoch": 0.2698412698412698,
+      "grad_norm": 0.005003658588975668,
+      "learning_rate": 0.00017161521883143934,
+      "loss": 0.0002,
+      "step": 17
+    },
+    {
+      "epoch": 0.2857142857142857,
+      "grad_norm": 0.004550958517938852,
+      "learning_rate": 0.00016792733388972932,
+      "loss": 0.0001,
+      "step": 18
+    },
+    {
+      "epoch": 0.30158730158730157,
+      "grad_norm": 0.0038706848863512278,
+      "learning_rate": 0.00016405931786981755,
+      "loss": 0.0001,
+      "step": 19
+    },
+    {
+      "epoch": 0.31746031746031744,
+      "grad_norm": 0.0038352019619196653,
+      "learning_rate": 0.00016002142805483685,
+      "loss": 0.0001,
+      "step": 20
+    },
+    {
+      "epoch": 0.3333333333333333,
+      "grad_norm": 0.0026093204505741596,
+      "learning_rate": 0.00015582437220268647,
+      "loss": 0.0001,
+      "step": 21
+    },
+    {
+      "epoch": 0.3492063492063492,
+      "grad_norm": 0.0022612179163843393,
+      "learning_rate": 0.0001514792801509831,
+      "loss": 0.0001,
+      "step": 22
+    },
+    {
+      "epoch": 0.36507936507936506,
+      "grad_norm": 0.0023677884601056576,
+      "learning_rate": 0.000146997674302732,
+      "loss": 0.0001,
+      "step": 23
+    },
+    {
+      "epoch": 0.38095238095238093,
+      "grad_norm": 0.0018812628695741296,
+      "learning_rate": 0.0001423914390709861,
+      "loss": 0.0,
+      "step": 24
+    },
+    {
+      "epoch": 0.3968253968253968,
+      "grad_norm": 0.0016407269285991788,
+      "learning_rate": 0.00013767278936351854,
+      "loss": 0.0,
+      "step": 25
+    },
+    {
+      "epoch": 0.4126984126984127,
+      "grad_norm": 0.001235563075169921,
+      "learning_rate": 0.0001328542381910835,
+      "loss": 0.0,
+      "step": 26
+    },
+    {
+      "epoch": 0.42857142857142855,
+      "grad_norm": 0.0011578642297536135,
+      "learning_rate": 0.00012794856348516095,
+      "loss": 0.0,
+      "step": 27
+    },
+    {
+      "epoch": 0.4444444444444444,
+      "grad_norm": 0.0008548243786208332,
+      "learning_rate": 0.0001229687742131796,
+      "loss": 0.0,
+      "step": 28
+    },
+    {
+      "epoch": 0.4603174603174603,
+      "grad_norm": 0.0008636576822027564,
+      "learning_rate": 0.00011792807588107357,
+      "loss": 0.0,
+      "step": 29
+    },
+    {
+      "epoch": 0.47619047619047616,
+      "grad_norm": 0.0006893647951073945,
+      "learning_rate": 0.00011283983551465511,
+      "loss": 0.0,
+      "step": 30
+    },
+    {
+      "epoch": 0.49206349206349204,
+      "grad_norm": 0.0006280859233811498,
+      "learning_rate": 0.00010771754621266466,
+      "loss": 0.0,
+      "step": 31
+    },
+    {
+      "epoch": 0.5079365079365079,
+      "grad_norm": 0.0005190623342059553,
+      "learning_rate": 0.00010257479136549889,
+      "loss": 0.0,
+      "step": 32
+    },
+    {
+      "epoch": 0.5238095238095238,
+      "grad_norm": 0.00044950845767743886,
+      "learning_rate": 9.742520863450115e-05,
+      "loss": 0.0,
+      "step": 33
+    },
+    {
+      "epoch": 0.5396825396825397,
+      "grad_norm": 0.0004545686533674598,
+      "learning_rate": 9.228245378733537e-05,
+      "loss": 0.0,
+      "step": 34
+    },
+    {
+      "epoch": 0.5555555555555556,
+      "grad_norm": 0.0004089027352165431,
+      "learning_rate": 8.71601644853449e-05,
+      "loss": 0.0,
+      "step": 35
+    },
+    {
+      "epoch": 0.5714285714285714,
+      "grad_norm": 0.00038734031841158867,
+      "learning_rate": 8.207192411892646e-05,
+      "loss": 0.0,
+      "step": 36
+    },
+    {
+      "epoch": 0.5873015873015873,
+      "grad_norm": 0.0003726118884515017,
+      "learning_rate": 7.703122578682046e-05,
+      "loss": 0.0,
+      "step": 37
+    },
+    {
+      "epoch": 0.6031746031746031,
+      "grad_norm": 0.0003228384302929044,
+      "learning_rate": 7.205143651483906e-05,
+      "loss": 0.0,
+      "step": 38
+    },
+    {
+      "epoch": 0.6190476190476191,
+      "grad_norm": 0.00027026349562220275,
+      "learning_rate": 6.714576180891654e-05,
+      "loss": 0.0,
+      "step": 39
+    },
+    {
+      "epoch": 0.6349206349206349,
+      "grad_norm": 0.0002839903172571212,
+      "learning_rate": 6.232721063648148e-05,
+      "loss": 0.0,
+      "step": 40
+    },
+    {
+      "epoch": 0.6507936507936508,
+      "grad_norm": 0.00029729033121839166,
+      "learning_rate": 5.7608560929013946e-05,
+      "loss": 0.0,
+      "step": 41
+    },
+    {
+      "epoch": 0.6666666666666666,
+      "grad_norm": 0.00026981416158378124,
+      "learning_rate": 5.300232569726804e-05,
+      "loss": 0.0,
+      "step": 42
+    },
+    {
+      "epoch": 0.6825396825396826,
+      "grad_norm": 0.000295968318823725,
+      "learning_rate": 4.852071984901696e-05,
+      "loss": 0.0,
+      "step": 43
+    },
+    {
+      "epoch": 0.6984126984126984,
+      "grad_norm": 0.0002813313330989331,
+      "learning_rate": 4.417562779731355e-05,
+      "loss": 0.0,
+      "step": 44
+    },
+    {
+      "epoch": 0.7142857142857143,
+      "grad_norm": 0.0002614279801491648,
+      "learning_rate": 3.997857194516319e-05,
+      "loss": 0.0,
+      "step": 45
+    },
+    {
+      "epoch": 0.7301587301587301,
+      "grad_norm": 0.0002446181606501341,
+      "learning_rate": 3.594068213018249e-05,
+      "loss": 0.0,
+      "step": 46
+    },
+    {
+      "epoch": 0.746031746031746,
+      "grad_norm": 0.0002518353867344558,
+      "learning_rate": 3.207266611027069e-05,
+      "loss": 0.0,
+      "step": 47
+    },
+    {
+      "epoch": 0.7619047619047619,
+      "grad_norm": 0.00023594856611452997,
+      "learning_rate": 2.8384781168560693e-05,
+      "loss": 0.0,
+      "step": 48
+    },
+    {
+      "epoch": 0.7777777777777778,
+      "grad_norm": 0.0002105052990373224,
+      "learning_rate": 2.4886806912948035e-05,
+      "loss": 0.0,
+      "step": 49
+    },
+    {
+      "epoch": 0.7936507936507936,
+      "grad_norm": 0.00019795401021838188,
+      "learning_rate": 2.1588019342328968e-05,
+      "loss": 0.0,
+      "step": 50
+    },
+    {
+      "epoch": 0.8095238095238095,
+      "grad_norm": 0.00026942399563267827,
+      "learning_rate": 1.8497166248318876e-05,
+      "loss": 0.0,
+      "step": 51
+    },
+    {
+      "epoch": 0.8253968253968254,
+      "grad_norm": 0.00023143402358982712,
+      "learning_rate": 1.562244401768144e-05,
+      "loss": 0.0,
+      "step": 52
+    },
+    {
+      "epoch": 0.8412698412698413,
+      "grad_norm": 0.0002526956086512655,
+      "learning_rate": 1.2971475896984475e-05,
+      "loss": 0.0,
+      "step": 53
+    },
+    {
+      "epoch": 0.8571428571428571,
+      "grad_norm": 0.00024234139709733427,
+      "learning_rate": 1.0551291777120464e-05,
+      "loss": 0.0,
+      "step": 54
+    },
+    {
+      "epoch": 0.873015873015873,
+      "grad_norm": 0.0002024932182393968,
+      "learning_rate": 8.368309551299536e-06,
+      "loss": 0.0,
+      "step": 55
+    },
+    {
+      "epoch": 0.8888888888888888,
+      "grad_norm": 0.00022990680008661002,
+      "learning_rate": 6.428318095950647e-06,
+      "loss": 0.0,
+      "step": 56
+    },
+    {
+      "epoch": 0.9047619047619048,
+      "grad_norm": 0.00024225791275966913,
+      "learning_rate": 4.7364619196617495e-06,
+      "loss": 0.0,
+      "step": 57
+    },
+    {
+      "epoch": 0.9206349206349206,
+      "grad_norm": 0.00024092527746688575,
+      "learning_rate": 3.2972275208679625e-06,
+      "loss": 0.0,
+      "step": 58
+    },
+    {
+      "epoch": 0.9365079365079365,
+      "grad_norm": 0.00023617202532477677,
+      "learning_rate": 2.1144314904642195e-06,
+      "loss": 0.0,
+      "step": 59
+    },
+    {
+      "epoch": 0.9523809523809523,
+      "grad_norm": 0.0002087904722429812,
+      "learning_rate": 1.1912103908922945e-06,
+      "loss": 0.0,
+      "step": 60
+    },
+    {
+      "epoch": 0.9682539682539683,
+      "grad_norm": 0.0002657146833371371,
+      "learning_rate": 5.300124385410943e-07,
+      "loss": 0.0,
+      "step": 61
+    },
+    {
+      "epoch": 0.9841269841269841,
+      "grad_norm": 0.00020013477478642017,
+      "learning_rate": 1.3259101151694708e-07,
+      "loss": 0.0,
+      "step": 62
+    },
+    {
+      "epoch": 1.0,
+      "grad_norm": 0.0002543162554502487,
+      "learning_rate": 0.0,
+      "loss": 0.0,
+      "step": 63
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 63,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 9894513868800000.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-63/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7f069be3f5585df80653cba1992bd799df88bfa372c2b77975a754e90e30ba77
+size 6456

config.json ADDED Viewed

	@@ -0,0 +1,47 @@

+{
+  "_attn_implementation_autoset": true,
+  "_name_or_path": "Austism/chronos-hermes-13b-v2",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "intermediate_size": 13824,
+  "max_position_embeddings": 4096,
+  "mlp_bias": false,
+  "model_type": "llama",
+  "num_attention_heads": 40,
+  "num_hidden_layers": 40,
+  "num_key_value_heads": 40,
+  "pad_token_id": 0,
+  "pretraining_tp": 1,
+  "quantization_config": {
+    "_load_in_4bit": false,
+    "_load_in_8bit": true,
+    "bnb_4bit_compute_dtype": "float32",
+    "bnb_4bit_quant_storage": "uint8",
+    "bnb_4bit_quant_type": "fp4",
+    "bnb_4bit_use_double_quant": false,
+    "llm_int8_enable_fp32_cpu_offload": false,
+    "llm_int8_has_fp16_weight": false,
+    "llm_int8_skip_modules": null,
+    "llm_int8_threshold": 6.0,
+    "load_in_4bit": false,
+    "load_in_8bit": true,
+    "quant_method": "bitsandbytes"
+  },
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.49.0",
+  "use_cache": false,
+  "vocab_size": 32032
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
+size 499723

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,50 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "32000": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "legacy": false,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<unk>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}