winglian commited on
Commit
ba226f7
1 Parent(s): 5f4849c
configs/mistral-7b-oo-phase1.yml ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: mistralai/Mistral-7B-v0.1
2
+ base_model_config: mistralai/Mistral-7B-v0.1
3
+ model_type: MistralForCausalLM
4
+ tokenizer_type: LlamaTokenizer
5
+ is_mistral_derived_model: true
6
+
7
+ load_in_8bit: false
8
+ load_in_4bit: false
9
+ strict: false
10
+
11
+ datasets:
12
+ - path: openaccess-ai-collective/oo-gpt4-filtered
13
+ type: alpaca_w_system.load_open_orca_chatml
14
+ data_files:
15
+ - 1M-GPT4-Augmented-filtered-gt10.parquet
16
+
17
+ dataset_prepared_path: last_run_prepared
18
+ val_set_size: 0.005
19
+ output_dir: ./mistral-oo
20
+
21
+ sequence_len: 4096
22
+ sample_packing: true
23
+ pad_to_sequence_len: true
24
+
25
+ wandb_project: mistral-oo-7b
26
+ wandb_entity:
27
+ wandb_watch:
28
+ wandb_run_id:
29
+ wandb_log_model:
30
+
31
+ gradient_accumulation_steps: 1
32
+ micro_batch_size: 6
33
+ num_epochs: 4
34
+ optimizer: adamw_torch
35
+ adam_beta2: 0.95
36
+ adam_epsilon: 0.00001
37
+ max_grad_norm: 1.0
38
+ lr_scheduler: cosine
39
+ learning_rate: 0.000006
40
+
41
+ train_on_inputs: false
42
+ group_by_length: false
43
+ bf16: true
44
+ fp16: false
45
+ tf32: false
46
+
47
+ gradient_checkpointing: true
48
+ early_stopping_patience:
49
+ resume_from_checkpoint:
50
+ local_rank:
51
+ logging_steps: 1
52
+ xformers_attention:
53
+ flash_attention: true
54
+
55
+ warmup_steps: 100
56
+ eval_steps: 0.05
57
+ eval_table_size:
58
+ eval_table_max_new_tokens:
59
+ save_steps:
60
+ debug:
61
+ deepspeed:
62
+ weight_decay: 0.1
63
+ fsdp:
64
+ fsdp_config:
65
+ special_tokens:
66
+ bos_token: "<s>"
67
+ eos_token: "<|im_end|>"
68
+ unk_token: "<unk>"
69
+ tokens:
70
+ - "<|im_start|>"
71
+ - "<|im_end|>"
pytorch_model-00001-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8e6cd0ab7ee0e68c7e8019dbb89ea25bbce57837a35a1e41402838ce11d32d6b
3
  size 9943044428
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4aaee725fafdf4c88baa1c0d2900b249c1bcf6716af55e610e75c50bfaa9eb4c
3
  size 9943044428
pytorch_model-00002-of-00002.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47288e5bb620c13adecf6ac4228c1fa35cdc168e69d1b9199833290cca2adda9
3
  size 4540552031
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec039d944707a7e6eb7d1453861143a3ea99d657b52b7d9a0aee2de734d9075f
3
  size 4540552031