practical-dreamer
/

rpgpt-30b-lora

Model card Files Files and versions Community

practicaldreamer commited on Jul 3, 2023

Commit

530a9a0

•

1 Parent(s): 6a44f40

init

Browse files

Files changed (13) hide show

adapter_config.json +17 -0
adapter_model.bin +3 -0
checkpoint-36/adapter_config.json +17 -0
checkpoint-36/adapter_model.bin +3 -0
checkpoint-36/optimizer.pt +3 -0
checkpoint-36/rng_state.pth +3 -0
checkpoint-36/scheduler.pt +3 -0
checkpoint-36/trainer_state.json +304 -0
checkpoint-36/training_args.bin +3 -0
documentation/hyperparameters.yml +69 -0
documentation/preprocessed_sample.txt +0 -0
documentation/requirements.txt +92 -0
documentation/wandb.info +1 -0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "base_model_name_or_path": "Neko-Institute-of-Science/LLaMA-30B-HF",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 128,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48eca6e0a8ca2ec993e3bc7396f9fe1c06fa72915006f31e72592cf3b81f16ad
+size 409031373

checkpoint-36/adapter_config.json ADDED Viewed

	@@ -0,0 +1,17 @@

+{
+  "base_model_name_or_path": "Neko-Institute-of-Science/LLaMA-30B-HF",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "lora_alpha": 128,
+  "lora_dropout": 0.05,
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 64,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

checkpoint-36/adapter_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48eca6e0a8ca2ec993e3bc7396f9fe1c06fa72915006f31e72592cf3b81f16ad
+size 409031373

checkpoint-36/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c4e68ef518f7c73a60e165ddc32b1e55569a2678dc88a9eba216124114ffc3f5
+size 205153925

checkpoint-36/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:47253551939bd748a3719d1c09bdc491a07c56dbaef6f75e6b7464039329022c
+size 14575

checkpoint-36/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b280ae8d5a93531d2378b82c4312f067d6f81420714361b4c45009a9f2adfca1
+size 627

checkpoint-36/trainer_state.json ADDED Viewed

	@@ -0,0 +1,304 @@

+{
+  "best_metric": 1.093778133392334,
+  "best_model_checkpoint": "output_dir/checkpoint-36",
+  "epoch": 1.083725305738476,
+  "global_step": 36,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03,
+      "learning_rate": 6.000000000000001e-07,
+      "loss": 1.1282,
+      "step": 1
+    },
+    {
+      "epoch": 0.06,
+      "learning_rate": 1.2000000000000002e-06,
+      "loss": 1.1856,
+      "step": 2
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 1.8e-06,
+      "loss": 1.1748,
+      "step": 3
+    },
+    {
+      "epoch": 0.12,
+      "learning_rate": 2.4000000000000003e-06,
+      "loss": 1.1748,
+      "step": 4
+    },
+    {
+      "epoch": 0.12,
+      "eval_loss": 1.1107146739959717,
+      "eval_runtime": 9.0527,
+      "eval_samples_per_second": 1.215,
+      "eval_steps_per_second": 0.221,
+      "step": 4
+    },
+    {
+      "epoch": 0.15,
+      "learning_rate": 3e-06,
+      "loss": 1.1506,
+      "step": 5
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 3.6e-06,
+      "loss": 1.1282,
+      "step": 6
+    },
+    {
+      "epoch": 0.21,
+      "learning_rate": 4.2000000000000004e-06,
+      "loss": 1.1833,
+      "step": 7
+    },
+    {
+      "epoch": 0.24,
+      "learning_rate": 4.800000000000001e-06,
+      "loss": 1.1521,
+      "step": 8
+    },
+    {
+      "epoch": 0.24,
+      "eval_loss": 1.1096446514129639,
+      "eval_runtime": 9.0523,
+      "eval_samples_per_second": 1.215,
+      "eval_steps_per_second": 0.221,
+      "step": 8
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 5.4e-06,
+      "loss": 1.1579,
+      "step": 9
+    },
+    {
+      "epoch": 0.3,
+      "learning_rate": 6e-06,
+      "loss": 1.1198,
+      "step": 10
+    },
+    {
+      "epoch": 0.33,
+      "learning_rate": 6.6e-06,
+      "loss": 1.2155,
+      "step": 11
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 7.2e-06,
+      "loss": 1.1188,
+      "step": 12
+    },
+    {
+      "epoch": 0.36,
+      "eval_loss": 1.1087640523910522,
+      "eval_runtime": 9.0422,
+      "eval_samples_per_second": 1.217,
+      "eval_steps_per_second": 0.221,
+      "step": 12
+    },
+    {
+      "epoch": 0.39,
+      "learning_rate": 7.8e-06,
+      "loss": 1.1327,
+      "step": 13
+    },
+    {
+      "epoch": 0.42,
+      "learning_rate": 8.400000000000001e-06,
+      "loss": 1.1311,
+      "step": 14
+    },
+    {
+      "epoch": 0.45,
+      "learning_rate": 9e-06,
+      "loss": 1.1268,
+      "step": 15
+    },
+    {
+      "epoch": 0.48,
+      "learning_rate": 9.600000000000001e-06,
+      "loss": 1.1316,
+      "step": 16
+    },
+    {
+      "epoch": 0.48,
+      "eval_loss": 1.1073920726776123,
+      "eval_runtime": 9.0556,
+      "eval_samples_per_second": 1.215,
+      "eval_steps_per_second": 0.221,
+      "step": 16
+    },
+    {
+      "epoch": 0.51,
+      "learning_rate": 1.02e-05,
+      "loss": 1.1142,
+      "step": 17
+    },
+    {
+      "epoch": 0.54,
+      "learning_rate": 1.08e-05,
+      "loss": 1.1369,
+      "step": 18
+    },
+    {
+      "epoch": 0.57,
+      "learning_rate": 1.1400000000000001e-05,
+      "loss": 1.139,
+      "step": 19
+    },
+    {
+      "epoch": 0.6,
+      "learning_rate": 1.2e-05,
+      "loss": 1.1231,
+      "step": 20
+    },
+    {
+      "epoch": 0.6,
+      "eval_loss": 1.1051356792449951,
+      "eval_runtime": 9.0525,
+      "eval_samples_per_second": 1.215,
+      "eval_steps_per_second": 0.221,
+      "step": 20
+    },
+    {
+      "epoch": 0.63,
+      "learning_rate": 1.26e-05,
+      "loss": 1.1243,
+      "step": 21
+    },
+    {
+      "epoch": 0.66,
+      "learning_rate": 1.32e-05,
+      "loss": 1.1161,
+      "step": 22
+    },
+    {
+      "epoch": 0.69,
+      "learning_rate": 1.3800000000000002e-05,
+      "loss": 1.1153,
+      "step": 23
+    },
+    {
+      "epoch": 0.72,
+      "learning_rate": 1.44e-05,
+      "loss": 1.1217,
+      "step": 24
+    },
+    {
+      "epoch": 0.72,
+      "eval_loss": 1.1019339561462402,
+      "eval_runtime": 9.0337,
+      "eval_samples_per_second": 1.218,
+      "eval_steps_per_second": 0.221,
+      "step": 24
+    },
+    {
+      "epoch": 0.75,
+      "learning_rate": 1.5e-05,
+      "loss": 1.1115,
+      "step": 25
+    },
+    {
+      "epoch": 0.78,
+      "learning_rate": 1.56e-05,
+      "loss": 1.1215,
+      "step": 26
+    },
+    {
+      "epoch": 0.81,
+      "learning_rate": 1.62e-05,
+      "loss": 1.1057,
+      "step": 27
+    },
+    {
+      "epoch": 0.84,
+      "learning_rate": 1.6800000000000002e-05,
+      "loss": 1.1184,
+      "step": 28
+    },
+    {
+      "epoch": 0.84,
+      "eval_loss": 1.0975638628005981,
+      "eval_runtime": 9.0363,
+      "eval_samples_per_second": 1.217,
+      "eval_steps_per_second": 0.221,
+      "step": 28
+    },
+    {
+      "epoch": 0.87,
+      "learning_rate": 1.74e-05,
+      "loss": 1.117,
+      "step": 29
+    },
+    {
+      "epoch": 0.9,
+      "learning_rate": 1.8e-05,
+      "loss": 1.1143,
+      "step": 30
+    },
+    {
+      "epoch": 0.93,
+      "learning_rate": 1.86e-05,
+      "loss": 1.1195,
+      "step": 31
+    },
+    {
+      "epoch": 0.96,
+      "learning_rate": 1.9200000000000003e-05,
+      "loss": 1.1062,
+      "step": 32
+    },
+    {
+      "epoch": 0.96,
+      "eval_loss": 1.0937966108322144,
+      "eval_runtime": 9.0438,
+      "eval_samples_per_second": 1.216,
+      "eval_steps_per_second": 0.221,
+      "step": 32
+    },
+    {
+      "epoch": 0.99,
+      "learning_rate": 1.98e-05,
+      "loss": 1.1314,
+      "step": 33
+    },
+    {
+      "epoch": 1.02,
+      "learning_rate": 2.04e-05,
+      "loss": 1.1049,
+      "step": 34
+    },
+    {
+      "epoch": 1.05,
+      "learning_rate": 2.1e-05,
+      "loss": 1.1067,
+      "step": 35
+    },
+    {
+      "epoch": 1.08,
+      "learning_rate": 2.16e-05,
+      "loss": 1.1027,
+      "step": 36
+    },
+    {
+      "epoch": 1.08,
+      "eval_loss": 1.093778133392334,
+      "eval_runtime": 9.0523,
+      "eval_samples_per_second": 1.215,
+      "eval_steps_per_second": 0.221,
+      "step": 36
+    }
+  ],
+  "max_steps": 99,
+  "num_train_epochs": 3,
+  "total_flos": 1.8344233538578022e+18,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-36/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5b21bf743ecaa5f9de425c65a9bb817e2a0607a8a178b081ea0acfe15e633fff
+size 3963

documentation/hyperparameters.yml ADDED Viewed

	@@ -0,0 +1,69 @@

+base_model: Neko-Institute-of-Science/LLaMA-30B-HF
+base_model_config: Neko-Institute-of-Science/LLaMA-30B-HF
+model_type: LlamaForCausalLM
+tokenizer_type: LlamaTokenizer
+load_in_8bit: true
+load_4bit:
+datasets:
+  - path: practicaldreamer/RPGPT_PublicDomain-ShareGPT
+    data_files: RPGPT_PublicDomain_v3-sharegpt.json
+    type: sharegpt
+dataset_prepared_path: data/last_run_prepared
+val_set_size: 0.0025
+adapter: lora
+lora_model_dir:
+sequence_len: 2048
+max_packed_sequence_len:
+lora_r: 64
+lora_alpha: 128
+lora_dropout: 0.05
+lora_target_modules:
+  - q_proj
+  - v_proj
+#  - k_proj
+#  - o_proj
+lora_fan_in_fan_out: false
+wandb_project:
+wandb_watch:
+wandb_run_id:
+wandb_log_model: checkpoint
+output_dir: output_dir
+batch_size: 128
+micro_batch_size: 4
+eval_batch_size: 1
+num_epochs: 1
+warmup_steps: 50
+logging_steps:
+learning_rate: 0.00003
+optimizer: adamw_bnb_8bit
+torchdistx_path:
+lr_scheduler: cosine
+train_on_inputs: false
+group_by_length: false
+bf16: true
+tf32: true
+gradient_checkpointing: true
+early_stopping_patience: 3
+resume_from_checkpoint:
+auto_resume_from_checkpoints:
+local_rank:
+xformers_attention: true
+flash_attention:
+gptq_groupsize:
+gptq_model_v1:
+save_steps: 4
+debug:
+deepspeed:
+weight_decay: 0.0
+fsdp:
+fsdp_config:
+  fsdp_transformer_layer_cls_to_wrap:
+  fsdp_min_num_params: 2000
+  fsdp_backward_prefetch:
+    - backward_pre
+  limit_all_gathers: false
+special_tokens:
+  pad_token: "[PAD]"
+  bos_token: "<s>"
+  eos_token: "</s>"
+  unk_token: "<unk>"

documentation/preprocessed_sample.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

documentation/requirements.txt ADDED Viewed

	@@ -0,0 +1,92 @@

+accelerate @ git+https://github.com/huggingface/accelerate.git@24ae624d96866e3f993a13fc341ea0dcb68b1470
+aiohttp==3.8.4
+aiosignal==1.3.1
+alpaca-lora-4bit @ git+https://github.com/winglian/alpaca_lora_4bit.git@1b4a376ea816eb2417404b4d1ac27fa16471588a
+appdirs==1.4.4
+async-timeout==4.0.2
+attrdict==2.0.1
+attrs==23.1.0
+-e git+https://github.com/winglian/axolotl@a10a8265efde4ec61037560e3b8e2e31dab984af#egg=axolotl
+bitsandbytes==0.37.2
+black==23.3.0
+certifi==2022.12.7
+charset-normalizer==3.1.0
+click==8.1.3
+cmake==3.26.3
+colorama==0.4.6
+datasets==2.12.0
+deepspeed==0.9.4
+dill==0.3.6
+docker-pycreds==0.4.0
+einops==0.6.1
+filelock==3.12.0
+fire==0.5.0
+flash-attn==1.0.4
+frozenlist==1.3.3
+fsspec==2023.4.0
+gitdb==4.0.10
+GitPython==3.1.31
+hjson==3.1.0
+huggingface-hub==0.14.1
+idna==3.4
+Jinja2==3.1.2
+lit==16.0.2
+MarkupSafe==2.1.2
+mpmath==1.3.0
+multidict==6.0.4
+multiprocess==0.70.14
+mypy-extensions==1.0.0
+networkx==3.1
+ninja==1.11.1
+numpy==1.24.3
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-cupti-cu11==11.7.101
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
+nvidia-cufft-cu11==10.9.0.58
+nvidia-curand-cu11==10.2.10.91
+nvidia-cusolver-cu11==11.4.0.1
+nvidia-cusparse-cu11==11.7.4.91
+nvidia-nccl-cu11==2.14.3
+nvidia-nvtx-cu11==11.7.91
+packaging==23.1
+pandas==2.0.1
+pathspec==0.11.1
+pathtools==0.1.2
+peft @ git+https://github.com/huggingface/peft.git@70af02a2bca5a63921790036b2c9430edf4037e2
+platformdirs==3.5.0
+protobuf==4.22.4
+psutil==5.9.5
+py-cpuinfo==9.0.0
+pyarrow==12.0.0
+pydantic==1.10.7
+pyre-extensions==0.0.29
+python-dateutil==2.8.2
+pytz==2023.3
+PyYAML==6.0
+regex==2023.5.5
+requests==2.30.0
+responses==0.18.0
+safetensors==0.3.1
+sentencepiece==0.1.99
+sentry-sdk==1.21.1
+setproctitle==1.3.2
+six==1.16.0
+smmap==5.0.0
+sympy==1.11.1
+termcolor==2.3.0
+tokenizers==0.13.3
+tomli==2.0.1
+torch==2.0.0
+tqdm==4.65.0
+transformers @ git+https://github.com/huggingface/transformers.git@799df10aef3abfe6158c83daf0a9eacf8f6f0a1f
+triton==2.0.0
+typing-inspect==0.8.0
+typing_extensions==4.5.0
+tzdata==2023.3
+urllib3==2.0.2
+wandb==0.15.4
+xformers==0.0.19
+xxhash==3.2.0
+yarl==1.9.2

documentation/wandb.info ADDED Viewed

	@@ -0,0 +1 @@


1	+ https://wandb.ai/practicaldreamer/rpgpt/runs/d4gsi8vy