Vokturz commited on
Commit
2157f81
1 Parent(s): b6155f0

Model save

Browse files
Files changed (3) hide show
  1. README.md +143 -0
  2. generation_config.json +6 -0
  3. model.safetensors +1 -1
README.md ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: other
3
+ base_model: deepseek-ai/deepseek-coder-1.3b-base
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: deepseek_coder_1.3b_typescript
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ [<img src="https://raw.githubusercontent.com/OpenAccess-AI-Collective/axolotl/main/image/axolotl-badge-web.png" alt="Built with Axolotl" width="200" height="32"/>](https://github.com/OpenAccess-AI-Collective/axolotl)
15
+ <details><summary>See axolotl config</summary>
16
+
17
+ axolotl version: `0.3.0`
18
+ ```yaml
19
+ base_model: deepseek-ai/deepseek-coder-1.3b-base
20
+ model_type: AutoModelForCausalLM
21
+ trust_remote_code: true
22
+ load_in_8bit: false
23
+ load_in_4bit: false
24
+ strict: false
25
+
26
+
27
+ datasets:
28
+ - path: CodeGPTPlus/typescript-0-500000-seq1024
29
+ type: completion
30
+ field: text
31
+ #dataset_prepared_path:
32
+
33
+ #pretraining_dataset: CodeGPTPlus/typescript-0-500000-seq1024
34
+
35
+ val_set_size: 0.001
36
+ output_dir: ./fft-out
37
+
38
+ sequence_len: 1024
39
+
40
+ adapter:
41
+ lora_model_dir:
42
+ lora_r:
43
+ lora_alpha:
44
+ lora_dropout:
45
+ lora_target_linear:
46
+ lora_fan_in_fan_out:
47
+ lora_modules_to_save:
48
+
49
+ wandb_project: deepseek_1.3_fft
50
+ wandb_entity:
51
+ wandb_watch:
52
+ wandb_name: aws_a10g
53
+ wandb_log_model: end
54
+
55
+
56
+ gradient_accumulation_steps: 2
57
+ micro_batch_size: 20
58
+ num_epochs: 1
59
+ #max_steps: 1 # REMOVE IT
60
+ optimizer: adamw_bnb_8bit
61
+ adam_beta1: 0.9
62
+ adam_beta2: 0.999
63
+ adam_epsilon: 0.000001
64
+ max_grad_norm: 1.0
65
+ weight_decay: 0.1
66
+ lr_scheduler: cosine
67
+ learning_rate: 0.00002
68
+ train_on_inputs: false
69
+ group_by_length: false
70
+ bf16: true
71
+ fp16: false
72
+ tf32: false
73
+ gradient_checkpointing: true
74
+ early_stopping_patience:
75
+ resume_from_checkpoint:
76
+ local_rank:
77
+ logging_steps: 1
78
+ xformers_attention:
79
+ flash_attention: true
80
+
81
+ loss_watchdog_threshold: 5.0
82
+ loss_watchdog_patience: 3
83
+
84
+ hub_model_id: CodeGPTPlus/deepseek_coder_1.3b_typescript
85
+ hub_strategy: every_save
86
+ warmup_ratio: 0.01
87
+ evals_per_epoch: 20
88
+ saves_per_epoch: 3
89
+ debug:
90
+ deepspeed:
91
+
92
+ fsdp:
93
+ fsdp_config:
94
+ special_tokens:
95
+ bos_token: "<|begin▁of▁sentence|>"
96
+ eos_token: "<|end▁of▁sentence|>"
97
+ pad_token: "<|end▁of▁sentence|>"
98
+ # fim_prefix: "<|fim▁begin|>"
99
+ # fim_middle: "<|fim▁hole|>"
100
+ # fim_suffix: "<|fim▁end|>"
101
+
102
+ ```
103
+
104
+ </details><br>
105
+
106
+ # deepseek_coder_1.3b_typescript
107
+
108
+ This model is a fine-tuned version of [deepseek-ai/deepseek-coder-1.3b-base](https://huggingface.co/deepseek-ai/deepseek-coder-1.3b-base) on an unknown dataset.
109
+
110
+ ## Model description
111
+
112
+ More information needed
113
+
114
+ ## Intended uses & limitations
115
+
116
+ More information needed
117
+
118
+ ## Training and evaluation data
119
+
120
+ More information needed
121
+
122
+ ## Training procedure
123
+
124
+ ### Training hyperparameters
125
+
126
+ The following hyperparameters were used during training:
127
+ - learning_rate: 2e-05
128
+ - train_batch_size: 20
129
+ - eval_batch_size: 20
130
+ - seed: 42
131
+ - gradient_accumulation_steps: 2
132
+ - total_train_batch_size: 40
133
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-06
134
+ - lr_scheduler_type: cosine
135
+ - lr_scheduler_warmup_steps: 261
136
+ - num_epochs: 1
137
+
138
+ ### Framework versions
139
+
140
+ - Transformers 4.37.0.dev0
141
+ - Pytorch 2.0.1+cu118
142
+ - Datasets 2.16.1
143
+ - Tokenizers 0.15.0
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 32013,
4
+ "eos_token_id": 32014,
5
+ "transformers_version": "4.37.0.dev0"
6
+ }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd118bbd74402fe81992f4f5cd153d946950c59f3a0f3d2f0984d26b61c665af
3
  size 3234034400
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fee5059bda6faea9f81360b5b876ec081744de1940b536f62c1b6945a9876d2c
3
  size 3234034400