Isotr0py commited on
Commit
1215123
1 Parent(s): 520cbfb

Upload create_sample.py

Browse files
Files changed (1) hide show
  1. create_sample.py +157 -0
create_sample.py ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ from math import prod
5
+ from pathlib import Path
6
+ import ctypes
7
+ import logging
8
+ import numpy as np
9
+
10
+
11
+ import gguf
12
+ from gguf.constants import GGMLQuantizationType
13
+
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ c_float_p = ctypes.POINTER(ctypes.c_float)
19
+
20
+
21
+ class ggml_init_params(ctypes.Structure):
22
+ _fields_ = [
23
+ ("mem_size", ctypes.c_size_t),
24
+ ("mem_buffer", ctypes.c_void_p),
25
+ ("no_alloc", ctypes.c_bool),
26
+ ]
27
+
28
+
29
+ class GGMLQuants:
30
+ libggml: ctypes.CDLL
31
+
32
+ def __init__(self, libggml: Path):
33
+ self.libggml = ctypes.CDLL(str(libggml), winmode=0)
34
+ # self.libggml = ctypes.WinDLL(str(libggml), winmode=0)
35
+ self.libggml.ggml_quantize_chunk.restype = ctypes.c_size_t
36
+ # enum ggml_type type,
37
+ # const float * src,
38
+ # void * dst,
39
+ # int64_t start,
40
+ # int64_t nrows,
41
+ # int64_t n_per_row,
42
+ # const float * imatrix) {
43
+ self.libggml.ggml_quantize_chunk.argtypes = (
44
+ ctypes.c_int,
45
+ ctypes.POINTER(ctypes.c_float),
46
+ ctypes.c_void_p,
47
+ ctypes.c_int64,
48
+ ctypes.c_int64,
49
+ ctypes.c_int64,
50
+ ctypes.POINTER(ctypes.c_float),
51
+ )
52
+
53
+ self.libggml.ggml_quantize_requires_imatrix.restype = ctypes.c_bool
54
+ self.libggml.ggml_quantize_requires_imatrix.argtypes = (ctypes.c_int,)
55
+
56
+ for t in (
57
+ "q4_0", "q4_1", "q5_0", "q5_1", "q8_0",
58
+ "q2_K", "q3_K", "q4_K", "q5_K", "q6_K",
59
+ "tq1_0", "tq2_0",
60
+ "iq2_xxs", "iq2_xs", "iq2_s", "iq3_xxs", "iq3_s", "iq1_s", "iq1_m",
61
+ "iq4_nl", "iq4_xs",
62
+ ):
63
+ dequant_func: ctypes._NamedFuncPointer = getattr(self.libggml, "dequantize_row_" + t)
64
+ dequant_func.restype = None
65
+ dequant_func.argtypes = (ctypes.c_void_p, ctypes.POINTER(ctypes.c_float), ctypes.c_int64)
66
+
67
+ self.libggml.ggml_fp16_to_fp32_row.restype = None
68
+ self.libggml.ggml_fp16_to_fp32_row.argtypes = (ctypes.POINTER(ctypes.c_uint16), ctypes.POINTER(ctypes.c_float), ctypes.c_int64)
69
+ self.libggml.ggml_bf16_to_fp32_row.restype = None
70
+ self.libggml.ggml_bf16_to_fp32_row.argtypes = (ctypes.POINTER(ctypes.c_uint16), ctypes.POINTER(ctypes.c_float), ctypes.c_int64)
71
+
72
+ self.libggml.ggml_init.argtypes = (ggml_init_params,)
73
+
74
+ self.libggml.ggml_init(ggml_init_params(1 * 1024 * 1024, 0, False))
75
+
76
+ def dequantize(self, tensor: np.ndarray, qtype: GGMLQuantizationType) -> np.ndarray:
77
+ result = np.zeros(gguf.quant_shape_from_byte_shape(tensor.shape, qtype), dtype=np.float32, order="C")
78
+ if qtype == GGMLQuantizationType.F32:
79
+ # no-op
80
+ result = tensor.view(np.float32)
81
+ elif qtype == GGMLQuantizationType.F16:
82
+ self.libggml.ggml_fp16_to_fp32_row(tensor.ctypes.data_as(ctypes.POINTER(ctypes.c_uint16)), result.ctypes.data_as(c_float_p), result.size)
83
+ elif qtype == GGMLQuantizationType.BF16:
84
+ self.libggml.ggml_bf16_to_fp32_row(tensor.ctypes.data_as(ctypes.POINTER(ctypes.c_uint16)), result.ctypes.data_as(c_float_p), result.size)
85
+ else:
86
+ lw_qname = qtype.name.lower()
87
+ if lw_qname[-1] == "k":
88
+ lw_qname = lw_qname[:-1] + "K"
89
+ dequant_func: ctypes._NamedFuncPointer = getattr(self.libggml, "dequantize_row_" + lw_qname)
90
+ dequant_func(tensor.ctypes.data_as(ctypes.c_void_p), result.ctypes.data_as(c_float_p), result.size)
91
+ return result
92
+
93
+ def quantize(self, data: np.ndarray, qtype: GGMLQuantizationType) -> np.ndarray:
94
+ result = np.zeros(gguf.quant_shape_to_byte_shape(data.shape, qtype), dtype=np.uint8, order="C")
95
+ if self.libggml.ggml_quantize_requires_imatrix(qtype.value):
96
+ # TODO: is a column-wise sum of squares appropriate?
97
+ qw = np.sum((data * data).reshape((-1, data.shape[-1])), axis=0).ctypes.data_as(c_float_p)
98
+ else:
99
+ qw = ctypes.cast(0, c_float_p)
100
+ result_size = self.libggml.ggml_quantize_chunk(qtype.value, data.ctypes.data_as(c_float_p), result.ctypes.data_as(ctypes.c_void_p), 0, prod(data.shape[:-1]), data.shape[-1], qw)
101
+ assert result.size == result_size
102
+ return result
103
+
104
+
105
+ def create_sample(ggml_quants: GGMLQuants, hidden_size, qtype: GGMLQuantizationType) -> np.ndarray:
106
+ gguf_writer = gguf.GGUFWriter(f"Quant_{qtype.name}_{hidden_size}.gguf", "llama")
107
+
108
+ # Create a sample tensor
109
+ for size in [768, 1024, 2048, 5120, 18944]:
110
+ tensor = np.random.randn(size, hidden_size).astype(np.float32)
111
+ shape_str = "x".join(map(str, tensor.shape))
112
+
113
+ gguf_writer.add_tensor(f"tensor_{qtype.name}_{shape_str}", ggml_quants.quantize(tensor, qtype), raw_dtype=qtype)
114
+
115
+ gguf_writer.write_header_to_file()
116
+ gguf_writer.write_kv_data_to_file()
117
+ gguf_writer.write_tensors_to_file()
118
+
119
+ gguf_writer.close()
120
+
121
+
122
+ if __name__ == "__main__":
123
+ parser = argparse.ArgumentParser(description="Test Python (de)quantization against the reference C implementation")
124
+ parser.add_argument("--libggml", type=Path, default="libggml.so", help="The path to libggml.so")
125
+ parser.add_argument("--hidden_size", type=int, default=256, help="The hidden size of the sample tensor")
126
+ parser.add_argument("--seed", type=int, default=0, help="The hidden size of the sample tensor")
127
+
128
+ np.random.seed(0)
129
+
130
+ args = parser.parse_args()
131
+
132
+ logging.basicConfig(level=logging.DEBUG)
133
+
134
+ ggml_quants = GGMLQuants(args.libggml)
135
+
136
+ qtypes = [
137
+ GGMLQuantizationType.IQ1_M,
138
+ GGMLQuantizationType.IQ1_S,
139
+ GGMLQuantizationType.IQ2_S,
140
+ GGMLQuantizationType.IQ2_XS,
141
+ GGMLQuantizationType.IQ2_XXS,
142
+ GGMLQuantizationType.IQ3_S,
143
+ GGMLQuantizationType.IQ3_XXS,
144
+ GGMLQuantizationType.IQ4_NL,
145
+ GGMLQuantizationType.IQ4_XS,
146
+ GGMLQuantizationType.Q2_K,
147
+ GGMLQuantizationType.Q3_K,
148
+ GGMLQuantizationType.Q4_K,
149
+ GGMLQuantizationType.Q5_K,
150
+ GGMLQuantizationType.Q6_K,
151
+ GGMLQuantizationType.Q4_0,
152
+ GGMLQuantizationType.Q5_0,
153
+ GGMLQuantizationType.Q8_0,
154
+ ]
155
+
156
+ for qtype in qtypes:
157
+ create_sample(ggml_quants, args.hidden_size, qtype)