s3nh commited on
Commit
d0695fa
1 Parent(s): b34e465

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +93 -0
README.md ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: openrail
3
+ datasets:
4
+ - s3nh/alpaca-dolly-instruction-only-polish
5
+ language:
6
+ - pl
7
+ - en
8
+ library_name: transformers
9
+ pipeline_tag: text-generation
10
+ ---
11
+
12
+
13
+ Finetuned state-space/mamba-3.8b using s3nh/polish_dolly instruction dataset.
14
+
15
+ ```
16
+
17
+ pip install mamba_ssm
18
+
19
+ ```
20
+
21
+ is needed to properly infer on this model.
22
+ More detail explanation soon.
23
+
24
+
25
+ Axolotl config
26
+
27
+ ```
28
+ base_model: state-spaces/mamba-2.8b
29
+ model_type: MambaLMHeadModel
30
+ tokenizer_type: AutoTokenizer
31
+ tokenizer_config: EleutherAI/gpt-neox-20b
32
+
33
+ load_in_8bit: false
34
+ load_in_4bit: false
35
+ strict: false
36
+
37
+ datasets:
38
+ - path: s3nh/alpaca-dolly-instruction-only-polish
39
+ type: alpaca
40
+ dataset_prepared_path:
41
+ val_set_size: 0.0
42
+ output_dir: ./mamba
43
+
44
+ sequence_len: 1024
45
+ sample_packing: false
46
+ pad_to_sequence_len: false
47
+
48
+ wandb_project:
49
+ wandb_entity:
50
+ wandb_watch:
51
+ wandb_name:
52
+ wandb_log_model:
53
+
54
+ gradient_accumulation_steps: 4
55
+ micro_batch_size: 1
56
+ num_epochs: 2
57
+ optimizer: paged_adamw_8bit
58
+ lr_scheduler: cosine
59
+ learning_rate: 5e-5
60
+
61
+ train_on_inputs: false
62
+ group_by_length: true
63
+
64
+ bf16: true
65
+ fp16: false
66
+ tf32: true
67
+ save_strategy: steps
68
+ gradient_checkpointing: false
69
+ early_stopping_patience:
70
+ resume_from_checkpoint: true
71
+ local_rank:
72
+ logging_steps: 100
73
+ xformers_attention:
74
+ flash_attention:
75
+
76
+ warmup_steps: 10
77
+ evals_per_epoch: 2
78
+ eval_table_size:
79
+ eval_table_max_new_tokens: 128
80
+ saves_per_epoch:
81
+ save_steps: 3000
82
+ debug:
83
+ deepspeed:
84
+ weight_decay: 0.0
85
+ fsdp:
86
+ fsdp_config:
87
+ special_tokens:
88
+ tokens:
89
+ save_safetensors: False
90
+
91
+
92
+
93
+ ```