deuswoof commited on
Commit
de12c8c
1 Parent(s): ea6aecf

Training in progress, step 10

Browse files
README.md CHANGED
@@ -40,6 +40,18 @@ The following `bitsandbytes` quantization config was used during training:
40
  - bnb_4bit_use_double_quant: True
41
  - bnb_4bit_compute_dtype: bfloat16
42
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  The following `bitsandbytes` quantization config was used during training:
44
  - quant_method: bitsandbytes
45
  - load_in_8bit: False
@@ -56,5 +68,6 @@ The following `bitsandbytes` quantization config was used during training:
56
  - PEFT 0.5.0
57
  - PEFT 0.5.0
58
  - PEFT 0.5.0
 
59
 
60
  - PEFT 0.5.0
 
40
  - bnb_4bit_use_double_quant: True
41
  - bnb_4bit_compute_dtype: bfloat16
42
 
43
+ The following `bitsandbytes` quantization config was used during training:
44
+ - quant_method: bitsandbytes
45
+ - load_in_8bit: False
46
+ - load_in_4bit: True
47
+ - llm_int8_threshold: 6.0
48
+ - llm_int8_skip_modules: None
49
+ - llm_int8_enable_fp32_cpu_offload: False
50
+ - llm_int8_has_fp16_weight: False
51
+ - bnb_4bit_quant_type: nf4
52
+ - bnb_4bit_use_double_quant: True
53
+ - bnb_4bit_compute_dtype: bfloat16
54
+
55
  The following `bitsandbytes` quantization config was used during training:
56
  - quant_method: bitsandbytes
57
  - load_in_8bit: False
 
68
  - PEFT 0.5.0
69
  - PEFT 0.5.0
70
  - PEFT 0.5.0
71
+ - PEFT 0.5.0
72
 
73
  - PEFT 0.5.0
adapter_config.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "auto_mapping": null,
3
- "base_model_name_or_path": "tiiuae/falcon-7b",
4
  "bias": "none",
5
  "fan_in_fan_out": false,
6
  "inference_mode": true,
 
1
  {
2
  "auto_mapping": null,
3
+ "base_model_name_or_path": "tiiuae/falcon-rw-1b",
4
  "bias": "none",
5
  "fan_in_fan_out": false,
6
  "inference_mode": true,
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:803ab8011b150d790ddb415469edc8be316d3ce8e7ad91e59c32d9798ee958cb
3
- size 261189453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3092124bd2e862488f1cdca59c3894c7cd49f81d0f2467e0291dcc3c57dc5e5
3
+ size 100733709
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9ad3f6d49c7f11f95815510e85c6f8720bb38f23a3c448313f90939db5afad0
3
- size 100690288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ceda947757e8e863957b7c0ac6282dda8be63d6aff7d21ca866b79d3024273c
3
+ size 261131840
config.json CHANGED
@@ -1,33 +1,33 @@
1
  {
2
- "_name_or_path": "tiiuae/falcon-rw-1b",
3
- "alibi": true,
4
  "apply_residual_connection_post_layernorm": false,
5
  "architectures": [
6
  "FalconForCausalLM"
7
  ],
8
  "attention_dropout": 0.0,
9
  "auto_map": {
10
- "AutoConfig": "tiiuae/falcon-rw-1b--configuration_falcon.FalconConfig",
11
- "AutoModel": "tiiuae/falcon-rw-1b--modeling_falcon.FalconModel",
12
- "AutoModelForCausalLM": "tiiuae/falcon-rw-1b--modeling_falcon.FalconForCausalLM",
13
- "AutoModelForQuestionAnswering": "tiiuae/falcon-rw-1b--modeling_falcon.FalconForQuestionAnswering",
14
- "AutoModelForSequenceClassification": "tiiuae/falcon-rw-1b--modeling_falcon.FalconForSequenceClassification",
15
- "AutoModelForTokenClassification": "tiiuae/falcon-rw-1b--modeling_falcon.FalconForTokenClassification"
16
  },
17
- "bias": true,
18
- "bos_token_id": 1,
19
- "eos_token_id": 2,
20
  "hidden_dropout": 0.0,
21
- "hidden_size": 2048,
22
  "initializer_range": 0.02,
23
  "layer_norm_epsilon": 1e-05,
24
  "model_type": "falcon",
25
- "multi_query": false,
26
  "new_decoder_architecture": false,
27
- "num_attention_heads": 32,
28
- "num_hidden_layers": 24,
29
- "num_kv_heads": 32,
30
- "parallel_attn": false,
31
  "quantization_config": {
32
  "bnb_4bit_compute_dtype": "bfloat16",
33
  "bnb_4bit_quant_type": "nf4",
@@ -43,5 +43,5 @@
43
  "torch_dtype": "bfloat16",
44
  "transformers_version": "4.34.1",
45
  "use_cache": false,
46
- "vocab_size": 50304
47
  }
 
1
  {
2
+ "_name_or_path": "tiiuae/falcon-7b",
3
+ "alibi": false,
4
  "apply_residual_connection_post_layernorm": false,
5
  "architectures": [
6
  "FalconForCausalLM"
7
  ],
8
  "attention_dropout": 0.0,
9
  "auto_map": {
10
+ "AutoConfig": "tiiuae/falcon-7b--configuration_falcon.FalconConfig",
11
+ "AutoModel": "tiiuae/falcon-7b--modeling_falcon.FalconModel",
12
+ "AutoModelForCausalLM": "tiiuae/falcon-7b--modeling_falcon.FalconForCausalLM",
13
+ "AutoModelForQuestionAnswering": "tiiuae/falcon-7b--modeling_falcon.FalconForQuestionAnswering",
14
+ "AutoModelForSequenceClassification": "tiiuae/falcon-7b--modeling_falcon.FalconForSequenceClassification",
15
+ "AutoModelForTokenClassification": "tiiuae/falcon-7b--modeling_falcon.FalconForTokenClassification"
16
  },
17
+ "bias": false,
18
+ "bos_token_id": 11,
19
+ "eos_token_id": 11,
20
  "hidden_dropout": 0.0,
21
+ "hidden_size": 4544,
22
  "initializer_range": 0.02,
23
  "layer_norm_epsilon": 1e-05,
24
  "model_type": "falcon",
25
+ "multi_query": true,
26
  "new_decoder_architecture": false,
27
+ "num_attention_heads": 71,
28
+ "num_hidden_layers": 32,
29
+ "num_kv_heads": 71,
30
+ "parallel_attn": true,
31
  "quantization_config": {
32
  "bnb_4bit_compute_dtype": "bfloat16",
33
  "bnb_4bit_quant_type": "nf4",
 
43
  "torch_dtype": "bfloat16",
44
  "transformers_version": "4.34.1",
45
  "use_cache": false,
46
+ "vocab_size": 65024
47
  }
special_tokens_map.json CHANGED
@@ -1,17 +1,6 @@
1
  {
2
- "additional_special_tokens": [
3
- ">>TITLE<<",
4
- ">>ABSTRACT<<",
5
- ">>INTRODUCTION<<",
6
- ">>SUMMARY<<",
7
- ">>COMMENT<<",
8
- ">>ANSWER<<",
9
- ">>QUESTION<<",
10
- ">>DOMAIN<<",
11
- ">>PREFIX<<",
12
- ">>SUFFIX<<",
13
- ">>MIDDLE<<"
14
- ],
15
  "eos_token": "<|endoftext|>",
16
- "pad_token": "<|endoftext|>"
 
17
  }
 
1
  {
2
+ "bos_token": "<|endoftext|>",
 
 
 
 
 
 
 
 
 
 
 
 
3
  "eos_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>",
5
+ "unk_token": "<|endoftext|>"
6
  }
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,95 +1,7 @@
1
  {
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
- "0": {
5
- "content": ">>TITLE<<",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "1": {
13
- "content": ">>ABSTRACT<<",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "2": {
21
- "content": ">>INTRODUCTION<<",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "3": {
29
- "content": ">>SUMMARY<<",
30
- "lstrip": false,
31
- "normalized": false,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
- "4": {
37
- "content": ">>COMMENT<<",
38
- "lstrip": false,
39
- "normalized": false,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
- },
44
- "5": {
45
- "content": ">>ANSWER<<",
46
- "lstrip": false,
47
- "normalized": false,
48
- "rstrip": false,
49
- "single_word": false,
50
- "special": true
51
- },
52
- "6": {
53
- "content": ">>QUESTION<<",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false,
58
- "special": true
59
- },
60
- "7": {
61
- "content": ">>DOMAIN<<",
62
- "lstrip": false,
63
- "normalized": false,
64
- "rstrip": false,
65
- "single_word": false,
66
- "special": true
67
- },
68
- "8": {
69
- "content": ">>PREFIX<<",
70
- "lstrip": false,
71
- "normalized": false,
72
- "rstrip": false,
73
- "single_word": false,
74
- "special": true
75
- },
76
- "9": {
77
- "content": ">>SUFFIX<<",
78
- "lstrip": false,
79
- "normalized": false,
80
- "rstrip": false,
81
- "single_word": false,
82
- "special": true
83
- },
84
- "10": {
85
- "content": ">>MIDDLE<<",
86
- "lstrip": false,
87
- "normalized": false,
88
- "rstrip": false,
89
- "single_word": false,
90
- "special": true
91
- },
92
- "11": {
93
  "content": "<|endoftext|>",
94
  "lstrip": false,
95
  "normalized": false,
@@ -98,26 +10,11 @@
98
  "special": true
99
  }
100
  },
101
- "additional_special_tokens": [
102
- ">>TITLE<<",
103
- ">>ABSTRACT<<",
104
- ">>INTRODUCTION<<",
105
- ">>SUMMARY<<",
106
- ">>COMMENT<<",
107
- ">>ANSWER<<",
108
- ">>QUESTION<<",
109
- ">>DOMAIN<<",
110
- ">>PREFIX<<",
111
- ">>SUFFIX<<",
112
- ">>MIDDLE<<"
113
- ],
114
  "clean_up_tokenization_spaces": true,
115
  "eos_token": "<|endoftext|>",
116
- "model_input_names": [
117
- "input_ids",
118
- "attention_mask"
119
- ],
120
- "model_max_length": 2048,
121
  "pad_token": "<|endoftext|>",
122
- "tokenizer_class": "PreTrainedTokenizerFast"
 
123
  }
 
1
  {
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
+ "50256": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "content": "<|endoftext|>",
6
  "lstrip": false,
7
  "normalized": false,
 
10
  "special": true
11
  }
12
  },
13
+ "bos_token": "<|endoftext|>",
 
 
 
 
 
 
 
 
 
 
 
 
14
  "clean_up_tokenization_spaces": true,
15
  "eos_token": "<|endoftext|>",
16
+ "model_max_length": 1024,
 
 
 
 
17
  "pad_token": "<|endoftext|>",
18
+ "tokenizer_class": "GPT2Tokenizer",
19
+ "unk_token": "<|endoftext|>"
20
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93e264e6d806d2410820009c1ca281477c923bfd5f3b234f0fd19dc5a10ed022
3
  size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46dfa040a93c7f61a97e86a998ade17bf1b307d0aa4af935c733a7c7ad2cd6b3
3
  size 4091