lockon-n commited on
Commit
8cf4e9b
1 Parent(s): 8f0a10a

First upload

Browse files
README.md CHANGED
@@ -4,8 +4,15 @@ language:
4
  ---
5
  This is a GPTQ 4bits version of Auto-J-13B. We convert it using [this script (by TheBroke)](https://gist.github.com/TheBloke/b47c50a70dd4fe653f64a12928286682#file-quant_autogptq-py).
6
 
7
- It takes about 8GB VRAM to load this model, and we provide an example for using it in `example.py.`
 
 
 
 
 
 
 
8
 
9
  Note that the behaviours of the quantized model and the original one might be different.
10
 
11
- Please refer to our [github repo](https://github.com/GAIR-NLP/auto-j) for more datails.
 
4
  ---
5
  This is a GPTQ 4bits version of Auto-J-13B. We convert it using [this script (by TheBroke)](https://gist.github.com/TheBloke/b47c50a70dd4fe653f64a12928286682#file-quant_autogptq-py).
6
 
7
+ To use the 4bits version of Auto-J, you need to install the following packages:
8
+ ```bash
9
+ pip install safetensors
10
+ pip install transformers>=4.32.0 optimum>=1.12.0
11
+ pip install auto-gptq --extra-index-url https://huggingface.github.io/autogptq-index/whl/cu118/ # Use cu117 if on CUDA 11.7
12
+ ```
13
+
14
+ It takes about 8GB VRAM to load this model, and we provide an example for using it in `example_gptq4bits.py.`
15
 
16
  Note that the behaviours of the quantized model and the original one might be different.
17
 
18
+ Please refer to our [github repo](https://github.com/GAIR-NLP/auto-j) for more datails.
added_tokens.json ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<extra_id_32001>": 32001,
3
+ "<extra_id_32002>": 32002,
4
+ "<extra_id_32003>": 32003,
5
+ "<extra_id_32004>": 32004,
6
+ "<extra_id_32005>": 32005,
7
+ "<extra_id_32006>": 32006,
8
+ "<extra_id_32007>": 32007,
9
+ "<extra_id_32008>": 32008,
10
+ "<extra_id_32009>": 32009,
11
+ "<extra_id_32010>": 32010,
12
+ "<extra_id_32011>": 32011,
13
+ "<extra_id_32012>": 32012,
14
+ "<extra_id_32013>": 32013,
15
+ "<extra_id_32014>": 32014,
16
+ "<extra_id_32015>": 32015,
17
+ "<extra_id_32016>": 32016,
18
+ "<extra_id_32017>": 32017,
19
+ "<extra_id_32018>": 32018,
20
+ "<extra_id_32019>": 32019,
21
+ "<extra_id_32020>": 32020,
22
+ "<extra_id_32021>": 32021,
23
+ "<extra_id_32022>": 32022,
24
+ "<extra_id_32023>": 32023,
25
+ "<extra_id_32024>": 32024,
26
+ "<extra_id_32025>": 32025,
27
+ "<extra_id_32026>": 32026,
28
+ "<extra_id_32027>": 32027,
29
+ "<extra_id_32028>": 32028,
30
+ "<extra_id_32029>": 32029,
31
+ "<extra_id_32030>": 32030,
32
+ "<extra_id_32031>": 32031,
33
+ "<extra_id_32032>": 32032,
34
+ "<extra_id_32033>": 32033,
35
+ "<extra_id_32034>": 32034,
36
+ "<extra_id_32035>": 32035,
37
+ "<extra_id_32036>": 32036,
38
+ "<extra_id_32037>": 32037,
39
+ "<extra_id_32038>": 32038,
40
+ "<extra_id_32039>": 32039,
41
+ "<extra_id_32040>": 32040,
42
+ "<extra_id_32041>": 32041,
43
+ "<extra_id_32042>": 32042,
44
+ "<extra_id_32043>": 32043,
45
+ "<extra_id_32044>": 32044,
46
+ "<extra_id_32045>": 32045,
47
+ "<extra_id_32046>": 32046,
48
+ "<extra_id_32047>": 32047,
49
+ "<extra_id_32048>": 32048,
50
+ "<extra_id_32049>": 32049,
51
+ "<extra_id_32050>": 32050,
52
+ "<extra_id_32051>": 32051,
53
+ "<extra_id_32052>": 32052,
54
+ "<extra_id_32053>": 32053,
55
+ "<extra_id_32054>": 32054,
56
+ "<extra_id_32055>": 32055,
57
+ "<extra_id_32056>": 32056,
58
+ "<extra_id_32057>": 32057,
59
+ "<extra_id_32058>": 32058,
60
+ "<extra_id_32059>": 32059,
61
+ "<extra_id_32060>": 32060,
62
+ "<extra_id_32061>": 32061,
63
+ "<extra_id_32062>": 32062,
64
+ "<extra_id_32063>": 32063,
65
+ "<extra_id_32064>": 32064,
66
+ "<extra_id_32065>": 32065,
67
+ "<extra_id_32066>": 32066,
68
+ "<extra_id_32067>": 32067,
69
+ "<extra_id_32068>": 32068,
70
+ "<extra_id_32069>": 32069,
71
+ "<extra_id_32070>": 32070,
72
+ "<extra_id_32071>": 32071,
73
+ "<extra_id_32072>": 32072,
74
+ "<extra_id_32073>": 32073,
75
+ "<extra_id_32074>": 32074,
76
+ "<extra_id_32075>": 32075,
77
+ "<extra_id_32076>": 32076,
78
+ "<extra_id_32077>": 32077,
79
+ "<extra_id_32078>": 32078,
80
+ "<extra_id_32079>": 32079,
81
+ "<extra_id_32080>": 32080,
82
+ "<extra_id_32081>": 32081,
83
+ "<extra_id_32082>": 32082,
84
+ "<extra_id_32083>": 32083,
85
+ "<extra_id_32084>": 32084,
86
+ "<extra_id_32085>": 32085,
87
+ "<extra_id_32086>": 32086,
88
+ "<extra_id_32087>": 32087,
89
+ "<extra_id_32088>": 32088,
90
+ "<extra_id_32089>": 32089,
91
+ "<extra_id_32090>": 32090,
92
+ "<extra_id_32091>": 32091,
93
+ "<extra_id_32092>": 32092,
94
+ "<extra_id_32093>": 32093,
95
+ "<extra_id_32094>": 32094,
96
+ "<extra_id_32095>": 32095,
97
+ "<extra_id_32096>": 32096,
98
+ "<extra_id_32097>": 32097,
99
+ "<extra_id_32098>": 32098,
100
+ "<extra_id_32099>": 32099,
101
+ "<extra_id_32100>": 32100,
102
+ "<extra_id_32101>": 32101,
103
+ "<extra_id_32102>": 32102,
104
+ "<extra_id_32103>": 32103,
105
+ "<extra_id_32104>": 32104,
106
+ "<extra_id_32105>": 32105,
107
+ "<extra_id_32106>": 32106,
108
+ "<extra_id_32107>": 32107,
109
+ "<extra_id_32108>": 32108,
110
+ "<extra_id_32109>": 32109,
111
+ "<extra_id_32110>": 32110,
112
+ "<extra_id_32111>": 32111,
113
+ "<extra_id_32112>": 32112,
114
+ "<extra_id_32113>": 32113,
115
+ "<extra_id_32114>": 32114,
116
+ "<extra_id_32115>": 32115,
117
+ "<extra_id_32116>": 32116,
118
+ "<extra_id_32117>": 32117,
119
+ "<extra_id_32118>": 32118,
120
+ "<extra_id_32119>": 32119,
121
+ "<extra_id_32120>": 32120,
122
+ "<extra_id_32121>": 32121,
123
+ "<extra_id_32122>": 32122,
124
+ "<extra_id_32123>": 32123,
125
+ "<extra_id_32124>": 32124,
126
+ "<extra_id_32125>": 32125,
127
+ "<extra_id_32126>": 32126,
128
+ "<extra_id_32127>": 32127,
129
+ "<pad>": 32000
130
+ }
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/cpfs01/shared/GAIR/GAIR_hdd/jlli/llama-2-sft/13b/grm/autoj-13b",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 5120,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 13824,
13
+ "max_position_embeddings": 8192,
14
+ "model_type": "llama",
15
+ "num_attention_heads": 40,
16
+ "num_hidden_layers": 40,
17
+ "num_key_value_heads": 40,
18
+ "pad_token_id": 0,
19
+ "pretraining_tp": 1,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_scaling": null,
22
+ "rope_theta": 10000.0,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "float32",
25
+ "transformers_version": "4.34.0",
26
+ "use_cache": true,
27
+ "vocab_size": 32128,
28
+ "quantization_config": {
29
+ "bits": 4,
30
+ "group_size": 128,
31
+ "damp_percent": 0.1,
32
+ "desc_act": true,
33
+ "static_groups": false,
34
+ "sym": true,
35
+ "true_sequential": true,
36
+ "model_name_or_path": null,
37
+ "model_file_base_name": null,
38
+ "quant_method": "gptq"
39
+ }
40
+ }
constants_prompt.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PROMPT_INPUT_SYSTEM: str = '[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n{input} [/INST]'
2
+
3
+ PROMPT_INPUT_WO_SYSTEM: str = "[INST] {input} [/INST]"
4
+
5
+ PROMPT_INPUT_FOR_SCENARIO_CLS: str = "Identify the scenario for the user's query, output 'default' if you are uncertain.\nQuery:\n{input}\nScenario:\n"
6
+
7
+ single = """Write critiques for a submitted response on a given user's query, and grade the response:
8
+
9
+ [BEGIN DATA]
10
+ ***
11
+ [Query]: {prompt}
12
+ ***
13
+ [Response]: {response}
14
+ ***
15
+ [END DATA]
16
+
17
+ Write critiques for this response. After that, you should give a final rating for the response on a scale of 1 to 10 by strictly following this format: "[[rating]]", for example: "Rating: [[5]]"."""
18
+
19
+ pairwise_tie = """You are assessing two submitted responses on a given user's query and judging which response is better or they are tied. Here is the data:
20
+
21
+ [BEGIN DATA]
22
+ ***
23
+ [Query]: {prompt}
24
+ ***
25
+ [Response 1]: {response}
26
+ ***
27
+ [Response 2]: {response_another}
28
+ ***
29
+ [END DATA]
30
+
31
+ Here are the instructions to assess and compare the two responses:
32
+
33
+ 1. Pinpoint the key factors to distinguish these two responses.
34
+ 2. Conclude your comparison by providing a final decision on which response is better, or they are tied. Begin your final decision statement with "So, the final decision is Response 1 / Response 2 / Tie". Ensure that your decision aligns coherently with the comprehensive evaluation and comparison you've provided."""
35
+
36
+ protocol_mapping = {
37
+ "pairwise_tie": pairwise_tie,
38
+ "single": single,
39
+ }
40
+
41
+
42
+ def llama2_wrapper(usr_msg, sys_msg=None):
43
+ if sys_msg is None:
44
+ return PROMPT_INPUT_WO_SYSTEM.format(input=usr_msg)
45
+ else:
46
+ return PROMPT_INPUT_SYSTEM.format(input=usr_msg, system_message=sys_msg)
47
+
48
+
49
+ def build_autoj_input(prompt, resp1, resp2=None, protocol="single"):
50
+ user_msg = protocol_mapping[protocol].format(prompt=prompt, response=resp1, response_another=resp2)
51
+ return llama2_wrapper(user_msg, )
52
+
53
+
54
+ if __name__ == '__main__':
55
+ t = build_autoj_input("instruction", "resp1", "resp2", "pairwise_tie")
56
+ print(t)
example_gptq4bits.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ from constants_prompt import *
3
+
4
+ path = "GAIR/autoj-13b-GPTQ-4bits"
5
+
6
+ tokenizer = AutoTokenizer.from_pretrained(path)
7
+
8
+ model = AutoModelForCausalLM.from_pretrained(path, device_map="auto")
9
+
10
+ query = "<your query>"
11
+ response = "<a response>"
12
+ text = build_autoj_input(query, response)
13
+
14
+ # or for pairwise, you can ->
15
+ # response_another = "<another response>"
16
+ # text = build_autoj_input(query, response, response_another, "pairwise_tie")
17
+
18
+ inputs = tokenizer(text, return_tensors="pt").to("cuda")
19
+
20
+ out = model.generate(**inputs, max_length=1000, temperature=0.0, do_sample=False, top_p=1.0)
21
+ print(tokenizer.decode(out[0], skip_special_tokens=True))
22
+ # note that this output contains the input part, you may need to remove it by yourself
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5763402bd2f563c41445fadb7f825c59f32282ab5d0c84ddc4acc127701fbc9
3
+ size 7920867784
quantize_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "damp_percent": 0.1,
5
+ "desc_act": true,
6
+ "static_groups": false,
7
+ "sym": true,
8
+ "true_sequential": true,
9
+ "model_name_or_path": null,
10
+ "model_file_base_name": null
11
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<pad>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "__type": "AddedToken",
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ "clean_up_tokenization_spaces": false,
11
+ "eos_token": {
12
+ "__type": "AddedToken",
13
+ "content": "</s>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false
18
+ },
19
+ "legacy": false,
20
+ "model_max_length": 4096,
21
+ "pad_token": null,
22
+ "padding_side": "right",
23
+ "sp_model_kwargs": {},
24
+ "tokenizer_class": "LlamaTokenizer",
25
+ "unk_token": {
26
+ "__type": "AddedToken",
27
+ "content": "<unk>",
28
+ "lstrip": false,
29
+ "normalized": false,
30
+ "rstrip": false,
31
+ "single_word": false
32
+ }
33
+ }