diabolic6045
/

Sanskrit-llama

@@ -1,21 +1,19 @@
 ---
-license: llama3
 library_name: peft
 tags:
 - axolotl
 - generated_from_trainer
-base_model: meta-llama/Meta-Llama-3-8B
 model-index:
 - name: Sanskrit-llama
   results: []
-datasets:
-- diabolic6045/Sanskrit-llama
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
-[<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
 <details><summary>See axolotl config</summary>
 axolotl version: `0.4.1`
@@ -24,7 +22,7 @@ axolotl version: `0.4.1`
 base_model: meta-llama/Meta-Llama-3-8B
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
-max_steps: 2
 bnb_config_kwargs:
   llm_int8_has_fp16_weight: false
   bnb_4bit_quant_type: nf4
@@ -35,7 +33,7 @@ load_in_4bit: true
 strict: false
 datasets:
-  - path: diabolic6045/Sanskrit-llama
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0
@@ -46,7 +44,7 @@ hf_use_auth_token: true
 adapter: qlora
 lora_model_dir:
-sequence_len: 1024
 sample_packing: true
 eval_sample_packing: false
 pad_to_sequence_len: true
@@ -56,21 +54,15 @@ lora_alpha: 16
 lora_dropout: 0.05
 lora_target_modules:
 lora_target_linear: true
-lora_fan_in_fan_out:
-wandb_project: संस्कृतम्-llama
-wandb_entity:
-wandb_watch: all
-wandb_name: संस्कृतम्-llama
-wandb_log_model:
-gradient_accumulation_steps: 4
 micro_batch_size: 2
 num_epochs: 1
 optimizer: paged_adamw_8bit
 lr_scheduler: cosine
 cosine_min_lr_ratio: 0.2
-learning_rate: 2e-5
 train_on_inputs: false
 group_by_length: false
@@ -132,19 +124,19 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 2e-05
 - train_batch_size: 2
 - eval_batch_size: 2
 - seed: 42
 - distributed_type: multi-GPU
 - num_devices: 2
-- gradient_accumulation_steps: 4
-- total_train_batch_size: 16
 - total_eval_batch_size: 4
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
-- training_steps: 2
 ### Training results
@@ -153,7 +145,7 @@ The following hyperparameters were used during training:
 ### Framework versions
 - PEFT 0.11.1
-- Transformers 4.41.1
 - Pytorch 2.1.2
 - Datasets 2.19.1
 - Tokenizers 0.19.1

 ---
+base_model: meta-llama/Meta-Llama-3-8B
 library_name: peft
+license: llama3
 tags:
 - axolotl
 - generated_from_trainer
 model-index:
 - name: Sanskrit-llama
   results: []
 ---
 <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 should probably proofread and complete it, then remove this comment. -->
+[<img src="https://raw.githubusercontent.com/axolotl-ai-cloud/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/axolotl-ai-cloud/axolotl)
 <details><summary>See axolotl config</summary>
 axolotl version: `0.4.1`
 base_model: meta-llama/Meta-Llama-3-8B
 model_type: AutoModelForCausalLM
 tokenizer_type: AutoTokenizer
+max_steps:
 bnb_config_kwargs:
   llm_int8_has_fp16_weight: false
   bnb_4bit_quant_type: nf4
 strict: false
 datasets:
+  - path: VinitT/Sanskrit-Llama_Base-Dataset
     type: alpaca
 dataset_prepared_path:
 val_set_size: 0
 adapter: qlora
 lora_model_dir:
+sequence_len: 512
 sample_packing: true
 eval_sample_packing: false
 pad_to_sequence_len: true
 lora_dropout: 0.05
 lora_target_modules:
 lora_target_linear: true
+lora_fan_in_fan_out:
+gradient_accumulation_steps: 8
 micro_batch_size: 2
 num_epochs: 1
 optimizer: paged_adamw_8bit
 lr_scheduler: cosine
 cosine_min_lr_ratio: 0.2
+learning_rate: 5e-5
 train_on_inputs: false
 group_by_length: false
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 5e-05
 - train_batch_size: 2
 - eval_batch_size: 2
 - seed: 42
 - distributed_type: multi-GPU
 - num_devices: 2
+- gradient_accumulation_steps: 8
+- total_train_batch_size: 32
 - total_eval_batch_size: 4
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
+- num_epochs: 1
 ### Training results
 ### Framework versions
 - PEFT 0.11.1
+- Transformers 4.42.3
 - Pytorch 2.1.2
 - Datasets 2.19.1
 - Tokenizers 0.19.1

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cd548dd24c84749e0d56af20848f2169dbca0ab2b67242a58f27e68c2db79019
 size 167843194

 version https://git-lfs.github.com/spec/v1
+oid sha256:d59031ba061534bc251ce97171f3d11833bed818b498853a2f16aa29c16509f7
 size 167843194