Squish42 commited on
Commit
06421b7
0 Parent(s):

Initial commit

Browse files
Files changed (8) hide show
  1. .gitattributes +35 -0
  2. .gitignore +1 -0
  3. README.md +27 -0
  4. config.json +23 -0
  5. quantize.py +22 -0
  6. quantize_config.json +10 -0
  7. tokenizer.json +0 -0
  8. tokenizer.model +3 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ models
README.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: unknown
3
+ ---
4
+
5
+ [ehartford/WizardLM-7B-Uncensored](https://huggingface.co/ehartford/WizardLM-7B-Uncensored) quantized to **8bit GPTQ** with act order + true sequential, no group size.
6
+
7
+ *For most uses, this probably isn't what you want.*\
8
+ *For 4bit with no act order or compatibility with `old-cuda` (text-generation-webui default) see [TheBloke/WizardLM-7B-uncensored-GPTQ](https://huggingface.co/TheBloke/WizardLM-7B-uncensored-GPTQ)*
9
+
10
+ Quantized using AutoGPTQ with the following config:
11
+ ```python
12
+ config: dict = dict(
13
+ quantize_config=dict(bits=8, desc_act=True, true_sequential=True, model_file_base_name='WizardLM-7B-Uncensored'),
14
+ use_safetensors=True
15
+ )
16
+ ```
17
+ See `quantize.py` for the full script.
18
+
19
+ Tested for compatibility with:
20
+ WSL with GPTQ-for-Llama `triton` branch.
21
+ Windows with AutoGPTQ on `cuda` (triton deselected)
22
+
23
+ AutoGPTQ loader should read configuration from `quantize_config.json`
24
+ For GPTQ-for-Llama use the following configuration when loading:\
25
+ wbits: 8\
26
+ groupsize: None\
27
+ model_type: llama\
config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "models/WizardLM-7B-Uncensored",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 4096,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 11008,
12
+ "max_position_embeddings": 2048,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 32,
15
+ "num_hidden_layers": 32,
16
+ "pad_token_id": 0,
17
+ "rms_norm_eps": 1e-06,
18
+ "tie_word_embeddings": false,
19
+ "torch_dtype": "float16",
20
+ "transformers_version": "4.30.2",
21
+ "use_cache": true,
22
+ "vocab_size": 32001
23
+ }
quantize.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, TextGenerationPipeline
2
+ from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
3
+ import logging
4
+
5
+ pretrained_model_dir: str = "models/WizardLM-7B-Uncensored"
6
+ quantized_model_dir: str = "./"
7
+ config: dict = dict(
8
+ quantize_config=dict(bits=8, desc_act=True, true_sequential=True, model_file_base_name='WizardLM-7B-Uncensored'),
9
+ use_safetensors=True
10
+ )
11
+
12
+ logging.basicConfig(
13
+ format="%(asctime)s %(levelname)s [%(name)s] %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S"
14
+ )
15
+
16
+ tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
17
+ examples: list[dict[str, list[int]]] = [tokenizer("It was a cold night")]
18
+
19
+ model = AutoGPTQForCausalLM.from_pretrained(pretrained_model_dir, BaseQuantizeConfig(**config['quantize_config']))
20
+ model.quantize(examples)
21
+
22
+ model.save_quantized(quantized_model_dir, use_safetensors=config['use_safetensors'])
quantize_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 8,
3
+ "group_size": -1,
4
+ "damp_percent": 0.01,
5
+ "desc_act": true,
6
+ "sym": true,
7
+ "true_sequential": true,
8
+ "model_name_or_path": null,
9
+ "model_file_base_name": "WizardLM-7B-Uncensored"
10
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723