diff --git a/cache_autogptq_cuda_256.cpp b/cache_autogptq_cuda_256.cpp new file mode 100644 index 0000000000000000000000000000000000000000..19e162917862e145daab36346c2e8db87508b81a --- /dev/null +++ b/cache_autogptq_cuda_256.cpp @@ -0,0 +1,199 @@ +#include +#include +#include + +// adapted from https://github.com/PanQiWei/AutoGPTQ/blob/main/autogptq_extension/cuda_256/autogptq_cuda_256.cpp +void vecquant8matmul_cuda( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros, + torch::Tensor g_idx +); + +void vecquant8matmul( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros, + torch::Tensor g_idx +) { + const at::cuda::OptionalCUDAGuard device_guard(device_of(vec)); + vecquant8matmul_cuda(vec, mat, mul, scales, zeros, g_idx); +} + +void vecquant8matmul_batched_cuda( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +); + +void vecquant8matmul_batched( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +) { + const at::cuda::OptionalCUDAGuard device_guard(device_of(vec)); + vecquant8matmul_batched_cuda(vec, mat, mul, scales, zeros); +} + +void vecquant8matmul_batched_column_compression_cuda( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +); + +void vecquant8matmul_batched_column_compression( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +) { + const at::cuda::OptionalCUDAGuard device_guard(device_of(vec)); + vecquant8matmul_batched_column_compression_cuda(vec, mat, mul, scales, zeros); +} + +void vecquant4matmul_batched_cuda( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +); + +void vecquant4matmul_batched( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +) { + const at::cuda::OptionalCUDAGuard device_guard(device_of(vec)); + vecquant4matmul_batched_cuda(vec, mat, mul, scales, zeros); +} + +void vecquant4matmul_batched_column_compression_cuda( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +); + +void vecquant4matmul_batched_column_compression( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +) { + const at::cuda::OptionalCUDAGuard device_guard(device_of(vec)); + vecquant4matmul_batched_column_compression_cuda(vec, mat, mul, scales, zeros); +} + +void vecquant8matmul_batched_old_cuda( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +); + +void vecquant8matmul_batched_old( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +) { + const at::cuda::OptionalCUDAGuard device_guard(device_of(vec)); + vecquant8matmul_batched_old_cuda(vec, mat, mul, scales, zeros); +} + + +void vecquant4matmul_batched_old_cuda( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +); + +void vecquant4matmul_batched_old( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +) { + const at::cuda::OptionalCUDAGuard device_guard(device_of(vec)); + vecquant4matmul_batched_old_cuda(vec, mat, mul, scales, zeros); +} + +void vecquant8matmul_batched_column_compression_old_cuda( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +); + +void vecquant8matmul_batched_column_compression_old( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +) { + const at::cuda::OptionalCUDAGuard device_guard(device_of(vec)); + vecquant8matmul_batched_column_compression_old_cuda(vec, mat, mul, scales, zeros); +} + +void vecquant4matmul_batched_column_compression_old_cuda( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +); + +void vecquant4matmul_batched_column_compression_old( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +) { + const at::cuda::OptionalCUDAGuard device_guard(device_of(vec)); + vecquant4matmul_batched_column_compression_old_cuda(vec, mat, mul, scales, zeros); +} + + + +void vecquant8matmul_batched_faster_cuda( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +); + +void vecquant8matmul_batched_faster( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +) { + const at::cuda::OptionalCUDAGuard device_guard(device_of(vec)); + vecquant8matmul_batched_faster_cuda(vec, mat, mul, scales, zeros); +} + + +void vecquant8matmul_batched_faster_old_cuda( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +); + +void vecquant8matmul_batched_faster_old( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +) { + const at::cuda::OptionalCUDAGuard device_guard(device_of(vec)); + vecquant8matmul_batched_faster_old_cuda(vec, mat, mul, scales, zeros); +} + +void vecquant8matmul_batched_column_compression_faster_cuda( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +); + +void vecquant8matmul_batched_column_compression_faster( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +) { + const at::cuda::OptionalCUDAGuard device_guard(device_of(vec)); + vecquant8matmul_batched_column_compression_faster_cuda(vec, mat, mul, scales, zeros); +} + + +void vecquant8matmul_batched_column_compression_faster_old_cuda( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +); + +void vecquant8matmul_batched_column_compression_faster_old( + torch::Tensor vec, torch::Tensor mat, torch::Tensor mul, + torch::Tensor scales, torch::Tensor zeros +) { + const at::cuda::OptionalCUDAGuard device_guard(device_of(vec)); + vecquant8matmul_batched_column_compression_faster_old_cuda(vec, mat, mul, scales, zeros); +} + + + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + m.def("vecquant8matmul", &vecquant8matmul, "Vector 8-bit Quantized Matrix Multiplication (CUDA) (desc_act)"); + m.def("vecquant8matmul_batched", &vecquant8matmul_batched, "Vector 8-bit Batched Quantized Matrix Multiplication (CUDA) (desc_act)"); + m.def("vecquant8matmul_batched_old", &vecquant8matmul_batched_old, "Vector 8-bit old Batched Quantized Matrix Multiplication (CUDA) (desc_act)"); + m.def("vecquant8matmul_batched_faster", &vecquant8matmul_batched_faster, "Vector 8-bit old Batched Quantized Matrix Multiplication (CUDA) (desc_act)"); + m.def("vecquant8matmul_batched_faster_old", &vecquant8matmul_batched_faster_old, "Vector 8-bit old Batched Quantized Matrix Multiplication (CUDA) (desc_act)"); + m.def("vecquant4matmul_batched_old", &vecquant4matmul_batched_old, "Vector 4-bit old Batched Quantized Matrix Multiplication (CUDA) (desc_act)"); + m.def("vecquant8matmul_batched_column_compression", &vecquant8matmul_batched_column_compression, "Vector 8-bit Batched Quantized Matrix Multiplication (CUDA) with weight's column compressed (desc_act)"); + m.def("vecquant8matmul_batched_column_compression_old", &vecquant8matmul_batched_column_compression_old, "Vector old 8-bit Batched Quantized Matrix Multiplication (CUDA) with weight's column compressed (desc_act)"); + m.def("vecquant8matmul_batched_column_compression_faster", &vecquant8matmul_batched_column_compression_faster, "Vector old 8-bit Batched Quantized Matrix Multiplication (CUDA) with weight's column compressed (desc_act)"); + m.def("vecquant8matmul_batched_column_compression_faster_old", &vecquant8matmul_batched_column_compression_faster_old, "Vector old 8-bit Batched Quantized Matrix Multiplication (CUDA) with weight's column compressed (desc_act)"); + m.def("vecquant4matmul_batched_column_compression_old", &vecquant4matmul_batched_column_compression_old, "Vector old 4-bit Batched Quantized Matrix Multiplication (CUDA) with weight's column compressed (desc_act)"); + m.def("vecquant4matmul_batched", &vecquant4matmul_batched, "Vector 4-bit Batched Quantized Matrix Multiplication (CUDA) (desc_act)"); + m.def("vecquant4matmul_batched_column_compression", &vecquant4matmul_batched_column_compression, "Vector 4-bit Batched Quantized Matrix Multiplication (CUDA) with weight's column compressed (desc_act)"); +} + diff --git a/cache_autogptq_cuda_kernel_256.cu b/cache_autogptq_cuda_kernel_256.cu new file mode 100644 index 0000000000000000000000000000000000000000..86b53f3cd96e0010a1f83804f496cd88d170f9c9 --- /dev/null +++ b/cache_autogptq_cuda_kernel_256.cu @@ -0,0 +1,1709 @@ +#define _CRT_SECURE_NO_WARNINGS +#include +#include +#include +#include +#include +#include + +#if (defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700) || defined(USE_ROCM) +// adapted from https://github.com/PanQiWei/AutoGPTQ/blob/main/autogptq_extension/cuda_256/autogptq_cuda_kernel_256.cu +__device__ __forceinline__ void atomicAdd(c10::Half* address, c10::Half val) { + unsigned int *address_as_ui = reinterpret_cast(reinterpret_cast(address) - (reinterpret_cast(address) & 2)); + unsigned int old = *address_as_ui; + unsigned int assumed; + + do { + assumed = old; + unsigned short hsum = reinterpret_cast(address) & 2 ? (old >> 16) : (old & 0xffff); + hsum += val; + old = reinterpret_cast(address) & 2 + ? (old & 0xffff) | (hsum << 16) + : (old & 0xffff0000) | hsum; + old = atomicCAS(address_as_ui, assumed, old); + + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); +} +__device__ __forceinline__ void atomicAdd(__half* address, c10::Half val) { + unsigned int * address_as_ui = (unsigned int *) ((char *)address - ((size_t)address & 2)); + unsigned int old = *address_as_ui; + unsigned int assumed; + + do { + assumed = old; + __half_raw hsum; + hsum.x = (size_t)address & 2 ? (old >> 16) : (old & 0xffff); + half tmpres = __hadd(hsum, val); + hsum = __half_raw(tmpres); + old = (size_t)address & 2 ? (old & 0xffff) | (hsum.x << 16) : (old & 0xffff0000) | hsum.x; + old = atomicCAS(address_as_ui, assumed, old); + } while (assumed != old); +} +#endif + +template +__global__ void VecQuant8MatMulKernel( + const scalar_t* __restrict__ vec, + const int* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const int* __restrict__ zeros, + const int* __restrict__ g_idx, + int batch, + int vec_height, + int height, + int width, + int zero_width +); + +template +__global__ void VecQuant8BatchMatMulColumnCompressionKernel( + const scalar_t* __restrict__ vec, + const int* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const int* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int height, + int width +); + +template +__global__ void VecQuant4BatchMatMulColumnCompressionKernel( + const scalar_t* __restrict__ vec, + const int* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const int* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int height, + int width +); + +template +__global__ void VecQuant8BatchMatMulKernel( + const scalar_t* __restrict__ vec, + const int* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const int* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int vec_height, + int height, + int width, + int zero_width +); + +template +__global__ void VecQuant4BatchMatMulKernel( + const scalar_t* __restrict__ vec, + const int* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const int* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int vec_height, + int height, + int width, + int zero_width +); + + + +template +__global__ void VecQuant8BatchMatMulKernel_old( + const scalar_t* __restrict__ vec, + const uint8_t* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const scalar_t* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int vec_height, + int height, + int width, + int zero_width +); + +__global__ void VecQuant8BatchMatMulKernel_faster( + const half* __restrict__ vec, + const uint8_t* __restrict__ mat, + half* __restrict__ mul, + const half* __restrict__ scales, + const half* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int vec_height, + int height, + int width, + int zero_width +); + + + +__global__ void VecQuant8BatchMatMulKernel_faster_old( + const half* __restrict__ vec, + const uint8_t* __restrict__ mat, + half* __restrict__ mul, + const half* __restrict__ scales, + const half* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int vec_height, + int height, + int width +); + + +template +__global__ void VecQuant4BatchMatMulKernel_old( + const scalar_t* __restrict__ vec, + const uint8_t* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const scalar_t* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int vec_height, + int height, + int width, + int zero_width +); + + +template +__global__ void VecQuant8BatchMatMulColumnCompressionKernel_old( + const scalar_t* __restrict__ vec, + const uint8_t* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const scalar_t* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int height, + int width +); + +__global__ void VecQuant8BatchMatMulColumnCompressionKernel_faster( + const half* __restrict__ vec, + const uint8_t* __restrict__ mat, + half* __restrict__ mul, + const half* __restrict__ scales, + const half* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int height, + int width +); + +__global__ void VecQuant8BatchMatMulColumnCompressionKernel_faster_old( + const half* __restrict__ vec, + const uint8_t* __restrict__ mat, + half* __restrict__ mul, + const half* __restrict__ scales, + const half* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int height, + int width +); + + +template +__global__ void VecQuant4BatchMatMulColumnCompressionKernel_old( + const scalar_t* __restrict__ vec, + const uint8_t* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const scalar_t* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int height, + int width +); + + +__global__ void VecQuant8BatchMatMulKernel_faster( + const half* __restrict__ vec, + const uint8_t* __restrict__ mat, + half* __restrict__ mul, + const half* __restrict__ scales, + const half* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int vec_height, + int height, + int width +); + + +__global__ void VecQuant8BatchMatMulColumnCompressionKernel_faster( + const half* __restrict__ vec, + const uint8_t* __restrict__ mat, + half* __restrict__ mul, + const half* __restrict__ scales, + const half* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int height, + int width +); + +const int BLOCKWIDTH = 128; +const int BLOCKHEIGHT8 = 32; +const int BLOCKHEIGHT4 = 16; +const int BLOCKHEIGHT_OLD4 = 128; +//const int BLOCKHEIGHT_OLD8 = 128; + +__device__ inline unsigned int as_unsigned(int i) { + return *reinterpret_cast(&i); +} + +__device__ inline int as_int(int i) { + return *reinterpret_cast(&i); +} + +void vecquant8matmul_batched_column_compression_cuda( + torch::Tensor vec, + torch::Tensor mat, + torch::Tensor mul, + torch::Tensor scales, + torch::Tensor zeros +) { + int batch = vec.size(0); + int heads = vec.size(1); + int vec_row = vec.size(2); + int height = vec.size(3); + int width = mat.size(3) * 4; + + dim3 blocks( + (height + BLOCKWIDTH - 1) / BLOCKWIDTH, + (width + BLOCKWIDTH - 1) / BLOCKWIDTH + ); + dim3 threads(BLOCKWIDTH); + + AT_DISPATCH_FLOATING_TYPES( + vec.type(), "vecquant8matmul_batched_cuda", ([&] { + VecQuant8BatchMatMulColumnCompressionKernel<<>>( + vec.data(), mat.data(), mul.data(), + scales.data(), zeros.data(), + batch, heads, vec_row, height, width + ); + }) + ); + +} + +template +__global__ void VecQuant8BatchMatMulColumnCompressionKernel( + const scalar_t* __restrict__ vec, + const int* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const int* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int height, + int width +) { + int weight_total = batch * heads * height * width / 4; + int input_total = batch * heads * vec_row * height; + int out_total = batch * heads * vec_row * width; + int tid = threadIdx.x; + // h is index of height with step being BLOCKWIDTH + int h = BLOCKWIDTH * blockIdx.x; + // w is index of width with step being 1 + int w = BLOCKWIDTH * blockIdx.y + tid; + if (w >= width && tid >= height) { + return; + } + + __shared__ scalar_t blockvec[BLOCKWIDTH]; + int k; + scalar_t w_tmp; + + float weight[BLOCKWIDTH]; + + for (int b = 0; b < batch; ++b){ + for (int head = 0; head < heads; ++head){ + int batch_shift = b * heads + head; + for (k = 0; k < BLOCKWIDTH && h + k < height; ++k){ + int i_w = (w / 4); + int w_bit = (w % 4) * 8; + + int w_index = (batch_shift * height + h + k) * width / 4 + i_w; + if (w_index >= weight_total || w >= width) { + weight[k] = 0; + } else { + scalar_t scale = scales[batch_shift * height + h + k]; + scalar_t zero = zeros[batch_shift * height + h + k]; + w_tmp = ((as_unsigned(mat[w_index]) >> w_bit) & 0xFF); + weight[k] = scale * (w_tmp - zero); + } + } + + scalar_t res; + for (int vr = 0; vr < vec_row; ++vr){ + res = 0; + int vec_index = (batch_shift * vec_row + vr) * height + blockIdx.x * BLOCKWIDTH + tid; + if (vec_index < input_total) { + blockvec[tid] = vec[vec_index]; + } else { + blockvec[tid] = 0; + } + + __syncthreads(); + for (k = 0; k < BLOCKWIDTH && h + k < height; ++k){ + // res is the dot product of BLOCKWIDTH elements (part of width) + res += weight[k] * blockvec[k]; + } + // add res to the final result, final matrix shape: (batch, vec_row, width) + int out_index = (batch_shift * vec_row + vr) * width + w; + if (out_index < out_total) { + atomicAdd(&mul[out_index], res); + } + __syncthreads(); + } + } + } +} + +void vecquant8matmul_batched_cuda( + torch::Tensor vec, + torch::Tensor mat, + torch::Tensor mul, + torch::Tensor scales, + torch::Tensor zeros +) { + int batch = vec.size(0); + int heads = vec.size(1); + int vec_row = vec.size(2); + int vec_height = vec.size(3); + int height = mat.size(2); + int width = mat.size(3); + int zero_width = zeros.size(2); + + dim3 blocks( + (height + BLOCKHEIGHT8 - 1) / BLOCKHEIGHT8, + (width + BLOCKWIDTH - 1) / BLOCKWIDTH + ); + dim3 threads(BLOCKWIDTH); + + AT_DISPATCH_FLOATING_TYPES( + vec.type(), "vecquant8matmul_batched_cuda", ([&] { + VecQuant8BatchMatMulKernel<<>>( + vec.data(), mat.data(), mul.data(), + scales.data(), zeros.data(), + batch, heads, vec_row, vec_height, height, width, zero_width + ); + }) + ); + +} + +template +__global__ void VecQuant8BatchMatMulKernel( + const scalar_t* __restrict__ vec, + const int* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const int* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int vec_height, + int height, + int width, + int zero_width +) { + int weight_total = batch * heads * height * width; + int input_total = batch * heads * vec_row * vec_height; + int out_total = batch * heads * vec_row * width; + int tid = threadIdx.x; + // h is index of height with step being BLOCKHEIGHT8 + int h = BLOCKHEIGHT8 * blockIdx.x; + // w is index of width with step being 1 + int w = BLOCKWIDTH * blockIdx.y + tid; + if (w >= width && tid >= vec_height) { + return; + } + + __shared__ scalar_t blockvec[BLOCKWIDTH]; + // i is index of mat of block first row + int i = width * h + w; + // if (i >= width * height) { + // return; + // } + int k; + scalar_t w_tmp; + + int z_w = w / 4; + int z_mod = (w % 4) * 8; + + float weight[BLOCKWIDTH]; + + for (int b = 0; b < batch; ++b){ + for (int head = 0; head < heads; ++head){ + int batch_shift = b * heads + head; + for (k = 0; k < BLOCKWIDTH && h * 4 + k < vec_height; ++k){ + int k_w = (k / 4); + int k_bit = (k % 4) * 8; + + int w_index = batch_shift * height * width + i + (k_w * width); + if (w_index >= weight_total || w >= width) { + weight[k] = 0; + } else { + scalar_t scale = scales[batch_shift * width + w]; + scalar_t zero; + if (zero_width == width) { + zero = zeros[batch_shift * width + w]; + } else { + zero = scalar_t(((as_unsigned(zeros[batch_shift * zero_width + z_w]) >> z_mod) & 0xFF) + 1); + } + w_tmp = ((as_unsigned(mat[w_index]) >> k_bit) & 0xFF); + weight[k] = scale * (w_tmp - zero); + } + } + + scalar_t res; + for (int vr = 0; vr < vec_row; ++vr){ + res = 0; + int vec_index = (batch_shift * vec_row + vr) * vec_height + blockIdx.x * BLOCKWIDTH + tid; + if (vec_index < input_total) { + blockvec[tid] = vec[vec_index]; + } else { + blockvec[tid] = 0; + } + + __syncthreads(); + for (k = 0; k < BLOCKWIDTH && h * 4 + k < vec_height; ++k){ + // res is the dot product of BLOCKWIDTH elements (part of width) + res += weight[k] * blockvec[k]; + } + // add res to the final result, final matrix shape: (batch, vec_row, width) + int out_index = (batch_shift * vec_row + vr) * width + w; + if (out_index < out_total) { + atomicAdd(&mul[out_index], res); + } + __syncthreads(); + } + } + } +} + + +void vecquant8matmul_cuda( + torch::Tensor vec, + torch::Tensor mat, + torch::Tensor mul, + torch::Tensor scales, + torch::Tensor zeros, + torch::Tensor g_idx +) { + int batch = vec.size(0); + int vec_height = vec.size(1); + int height = mat.size(0); + int width = mat.size(1); + int zero_width = zeros.size(1); + + dim3 blocks( + (height + BLOCKHEIGHT8 - 1) / BLOCKHEIGHT8, + (width + BLOCKWIDTH - 1) / BLOCKWIDTH + ); + dim3 threads(BLOCKWIDTH); + + AT_DISPATCH_FLOATING_TYPES( + vec.type(), "vecquant8matmul_cuda", ([&] { + VecQuant8MatMulKernel<<>>( + vec.data(), mat.data(), mul.data(), + scales.data(), zeros.data(), g_idx.data(), + batch, vec_height, height, width, zero_width + ); + }) + ); +} + +template +__global__ void VecQuant8MatMulKernel( + const scalar_t* __restrict__ vec, + const int* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const int* __restrict__ zeros, + const int* __restrict__ g_idx, + int batch, + int vec_height, + int height, + int width, + int zero_width +) { + int h = BLOCKHEIGHT8 * blockIdx.x; + int w = BLOCKWIDTH * blockIdx.y + threadIdx.x; + + __shared__ scalar_t blockvec[BLOCKWIDTH]; + int i = width * h + w; + int g_h = h * 4; + int k; + unsigned int g; + scalar_t w_tmp; + + int z_w = w / 4; + int z_mod = (w % 4) * 8; + + float weight[BLOCKWIDTH]; + + for (k = 0; k < BLOCKWIDTH; ++k){ + int k_w = (k / 4); + int k_bit = (k % 4) * 8; + + g = as_int(g_idx[g_h + k]); + scalar_t scale = scales[g * width + w]; + scalar_t zero = scalar_t(((as_unsigned(zeros[g * zero_width + z_w]) >> z_mod) & 0xFF) + 1); + + w_tmp = ((as_unsigned(mat[i + (k_w * width)]) >> k_bit) & 0xFF); + + weight[k] = scale * (w_tmp - zero); + } + + + scalar_t res; + for (int b = 0; b < batch; ++b){ + res = 0; + blockvec[threadIdx.x] = vec[b * vec_height + blockIdx.x * BLOCKWIDTH + threadIdx.x]; + __syncthreads(); + for (k = 0; k < BLOCKWIDTH; ++k){ + res += weight[k] * blockvec[k]; + } + atomicAdd(&mul[b * width + w], res); + __syncthreads(); + } +} + + + +void vecquant4matmul_batched_cuda( + torch::Tensor vec, + torch::Tensor mat, + torch::Tensor mul, + torch::Tensor scales, + torch::Tensor zeros +) { + int batch = vec.size(0); + int heads = vec.size(1); + int vec_row = vec.size(2); + int vec_height = vec.size(3); + int height = mat.size(2); + int width = mat.size(3); + int zero_width = zeros.size(2); + + dim3 blocks( + (height + BLOCKHEIGHT4 - 1) / BLOCKHEIGHT4, + (width + BLOCKWIDTH - 1) / BLOCKWIDTH + ); + dim3 threads(BLOCKWIDTH); + + AT_DISPATCH_FLOATING_TYPES( + vec.type(), "vecquant4matmul_batched_cuda", ([&] { + VecQuant4BatchMatMulKernel<<>>( + vec.data(), mat.data(), mul.data(), + scales.data(), zeros.data(), + batch, heads, vec_row, vec_height, height, width, zero_width + ); + }) + ); + +} + +template +__global__ void VecQuant4BatchMatMulKernel( + const scalar_t* __restrict__ vec, + const int* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const int* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int vec_height, + int height, + int width, + int zero_width +) { + int weight_total = batch * heads * height * width; + int input_total = batch * heads * vec_row * vec_height; + int out_total = batch * heads * vec_row * width; + int tid = threadIdx.x; + // h is index of height with step being BLOCKHEIGHT4 + int h = BLOCKHEIGHT4 * blockIdx.x; + // w is index of width with step being 1 + int w = BLOCKWIDTH * blockIdx.y + tid; + if (w >= width && tid >= vec_height) { + return; + } + + __shared__ scalar_t blockvec[BLOCKWIDTH]; + // i is index of mat of block first row + int i = width * h + w; + int k; + scalar_t w_tmp; + + int z_w = w / 8; + int z_mod = (w % 8) * 4; + + float weight[BLOCKWIDTH]; + + for (int b = 0; b < batch; ++b){ + for (int head = 0; head < heads; ++head){ + int batch_shift = b * heads + head; + for (k = 0; k < BLOCKWIDTH && h * 8 + k < vec_height; ++k){ + int k_w = (k / 8); + int k_bit = (k % 8) * 4; + + int w_index = batch_shift * height * width + i + (k_w * width); + if (w_index >= weight_total || w >= width) { + weight[k] = 0; + } else { + scalar_t scale = scales[batch_shift * width + w]; + scalar_t zero; + if (zero_width == width) { + zero = zeros[batch_shift * width + w]; + } else { + zero = scalar_t(((as_unsigned(zeros[batch_shift * zero_width + z_w]) >> z_mod) & 0xF)); + } + w_tmp = ((as_unsigned(mat[w_index]) >> k_bit) & 0xF); + weight[k] = scale * (w_tmp - zero); + } + } + + scalar_t res; + for (int vr = 0; vr < vec_row; ++vr){ + res = 0; + int vec_index = (batch_shift * vec_row + vr) * vec_height + blockIdx.x * BLOCKWIDTH + tid; + if (vec_index < input_total) { + blockvec[tid] = vec[vec_index]; + } else { + blockvec[tid] = 0; + } + + __syncthreads(); + for (k = 0; k < BLOCKWIDTH && h * 8 + k < vec_height; ++k){ + // res is the dot product of BLOCKWIDTH elements (part of width) + res += weight[k] * blockvec[k]; + } + // add res to the final result, final matrix shape: (batch, vec_row, width) + int out_index = (batch_shift * vec_row + vr) * width + w; + if (out_index < out_total) { + atomicAdd(&mul[out_index], res); + } + __syncthreads(); + } + } + } +} + + + +void vecquant4matmul_batched_column_compression_cuda( + torch::Tensor vec, + torch::Tensor mat, + torch::Tensor mul, + torch::Tensor scales, + torch::Tensor zeros +) { + int batch = vec.size(0); + int heads = vec.size(1); + int vec_row = vec.size(2); + int height = vec.size(3); + int width = mat.size(3) * 8; + + dim3 blocks( + (height + BLOCKWIDTH - 1) / BLOCKWIDTH, + (width + BLOCKWIDTH - 1) / BLOCKWIDTH + ); + dim3 threads(BLOCKWIDTH); + + AT_DISPATCH_FLOATING_TYPES( + vec.type(), "vecquant4matmul_batched_cuda", ([&] { + VecQuant4BatchMatMulColumnCompressionKernel<<>>( + vec.data(), mat.data(), mul.data(), + scales.data(), zeros.data(), + batch, heads, vec_row, height, width + ); + }) + ); + +} + +template +__global__ void VecQuant4BatchMatMulColumnCompressionKernel( + const scalar_t* __restrict__ vec, + const int* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const int* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int height, + int width +) { + int weight_total = batch * heads * height * width / 8; + int input_total = batch * heads * vec_row * height; + int out_total = batch * heads * vec_row * width; + int tid = threadIdx.x; + // h is index of height with step being BLOCKWIDTH + int h = BLOCKWIDTH * blockIdx.x; + // w is index of width with step being 1 + int w = BLOCKWIDTH * blockIdx.y + tid; + if (w >= width && tid >= height) { + return; + } + + __shared__ scalar_t blockvec[BLOCKWIDTH]; + int k; + scalar_t w_tmp; + + float weight[BLOCKWIDTH]; + + for (int b = 0; b < batch; ++b){ + for (int head = 0; head < heads; ++head){ + int batch_shift = b * heads + head; + for (k = 0; k < BLOCKWIDTH && h + k < height; ++k){ + int i_w = (w / 8); + int w_bit = (w % 8) * 4; + + int w_index = (batch_shift * height + h + k) * width / 8 + i_w; + if (w_index >= weight_total || w >= width) { + weight[k] = 0; + } else { + scalar_t scale = scales[batch_shift * height + h + k]; + scalar_t zero = zeros[batch_shift * height + h + k]; + w_tmp = ((as_unsigned(mat[w_index]) >> w_bit) & 0xF); + weight[k] = scale * (w_tmp - zero); + } + } + + scalar_t res; + for (int vr = 0; vr < vec_row; ++vr){ + res = 0; + int vec_index = (batch_shift * vec_row + vr) * height + blockIdx.x * BLOCKWIDTH + tid; + if (vec_index < input_total) { + blockvec[tid] = vec[vec_index]; + } else { + blockvec[tid] = 0; + } + + __syncthreads(); + for (k = 0; k < BLOCKWIDTH && h + k < height; ++k){ + // res is the dot product of BLOCKWIDTH elements (part of width) + res += weight[k] * blockvec[k]; + } + // add res to the final result, final matrix shape: (batch, vec_row, width) + int out_index = (batch_shift * vec_row + vr) * width + w; + if (out_index < out_total) { + atomicAdd(&mul[out_index], res); + } + __syncthreads(); + } + } + } +} + + +void vecquant8matmul_batched_old_cuda( + torch::Tensor vec, + torch::Tensor mat, + torch::Tensor mul, + torch::Tensor scales, + torch::Tensor zeros +) { + int batch = vec.size(0); + int heads = vec.size(1); + int vec_row = vec.size(2); + int vec_height = vec.size(3); + int height = mat.size(2); + int width = mat.size(3); + int zero_width = zeros.size(2); + + dim3 blocks( + (height + BLOCKWIDTH - 1) / BLOCKWIDTH, + (width + BLOCKWIDTH - 1) / BLOCKWIDTH + ); + dim3 threads(BLOCKWIDTH); + + AT_DISPATCH_FLOATING_TYPES( + vec.type(), "vecquant8matmul_batched_old_cuda", ([&] { + VecQuant8BatchMatMulKernel_old<<>>( + vec.data(), mat.data(), mul.data(), + scales.data(), zeros.data(), + batch, heads, vec_row, vec_height, height, width, zero_width + ); + }) + ); +} + + +template +__global__ void VecQuant8BatchMatMulKernel_old( + const scalar_t* __restrict__ vec, + const uint8_t* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const scalar_t* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int vec_height, + int height, + int width, + int zero_width +) { + int weight_total = batch * heads * height * width; + int input_total = batch * heads * vec_row * vec_height; + int out_total = batch * heads * vec_row * width; + int tid = threadIdx.x; + // h is index of height with step being BLOCKHEIGHT8 + int h = BLOCKWIDTH * blockIdx.x; + // w is index of width with step being 1 + int w = BLOCKWIDTH * blockIdx.y + tid; + if (w >= width && tid >= vec_height) { + return; + } + + __shared__ scalar_t blockvec[BLOCKWIDTH]; + // i is index of mat of block first row + int i = width * h + w; + int k; + scalar_t w_tmp; + + float weight[BLOCKWIDTH]; + for (int b = 0; b < batch; ++b){ + for (int head = 0; head < heads; ++head){ + int batch_shift = b * heads + head; + for (k = 0; k < BLOCKWIDTH && h + k < vec_height; ++k){ + int k_w = k; + int w_index = batch_shift * height * width + i + (k_w * width); + if (w_index >= weight_total || w >= width) { + weight[k] = 0; + } else { + scalar_t scale = scales[batch_shift * width + w]; + scalar_t zero = zeros[batch_shift * width + w]; + w_tmp = as_unsigned(mat[w_index]); + weight[k] = scale * (w_tmp - zero); + } + } + + scalar_t res; + for (int vr = 0; vr < vec_row; ++vr){ + res = 0; + int vec_index = (batch_shift * vec_row + vr) * vec_height + blockIdx.x * BLOCKWIDTH + tid; + if (vec_index < input_total) { + blockvec[tid] = vec[vec_index]; + } else { + blockvec[tid] = 0; + } + + __syncthreads(); + for (k = 0; k < BLOCKWIDTH && h + k < vec_height; ++k){ + // res is the dot product of BLOCKWIDTH elements (part of width) + res += weight[k] * blockvec[k]; + } + // add res to the final result, final matrix shape: (batch, vec_row, width) + int out_index = (batch_shift * vec_row + vr) * width + w; + if (out_index < out_total) { + atomicAdd(&mul[out_index], res); + } + __syncthreads(); + } + } + } +} + + + +void vecquant8matmul_batched_faster_cuda( + torch::Tensor vec, + torch::Tensor mat, + torch::Tensor mul, + torch::Tensor scales, + torch::Tensor zeros +) { + int batch = vec.size(0); + int heads = vec.size(1); + int vec_row = vec.size(2); + int vec_height = vec.size(3); + int height = mat.size(2); + int width = mat.size(3); + int zero_width = zeros.size(2); + + dim3 blocks( + (height + BLOCKWIDTH - 1) / BLOCKWIDTH, + (width + BLOCKWIDTH - 1) / BLOCKWIDTH + ); + dim3 threads(BLOCKWIDTH); + + VecQuant8BatchMatMulKernel_faster<<>>( + (half*) vec.data_ptr(), + (uint8_t*) mat.data_ptr(), + (half*) mul.data_ptr(), + (half*) scales.data_ptr(), + (half*) zeros.data_ptr(), + batch, heads, vec_row, vec_height, height, width, zero_width + ); +} + + + +__global__ void VecQuant8BatchMatMulKernel_faster( + const half* __restrict__ vec, + const uint8_t* __restrict__ mat, + half* __restrict__ mul, + const half* __restrict__ scales, + const half* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int vec_height, + int height, + int width, + int zero_width +) { + //int weight_total = batch * heads * height * width; + int input_total = batch * heads * vec_row * vec_height; + int out_total = batch * heads * vec_row * width; + int tid = threadIdx.x; + int h = BLOCKWIDTH * blockIdx.x; + int w = BLOCKWIDTH * blockIdx.y + tid; + if (w >= width && tid >= height) { + return; + } + + __shared__ float blockvec[BLOCKWIDTH]; + int i = width * h + w; + int k; + float w_tmp; + + float weight[BLOCKWIDTH]; + for (int b = 0; b < batch; ++b){ + for (int head = 0; head < heads; ++head){ + int batch_shift = b * heads + head; + for (k = 0; k < BLOCKWIDTH && h + k < vec_height; ++k){ + int k_w = k; + int w_index = batch_shift * height * width + i + (k_w * width); + float scale = __half2float(scales[batch_shift * width + w]); + float zero = __half2float(zeros[batch_shift * width + w]); + w_tmp = as_unsigned(mat[w_index]); + weight[k] = scale *(w_tmp-zero); + } + + float res; + for (int vr = 0; vr < vec_row; ++vr){ + res = 0; + int vec_index = (batch_shift * vec_row + vr) * vec_height + blockIdx.x * BLOCKWIDTH + tid; + if (vec_index < input_total) { + blockvec[tid] = __half2float(vec[vec_index]); + } else { + blockvec[tid] = 0; + } + __syncthreads(); + for (k = 0; k < BLOCKWIDTH && h + k < vec_height; ++k){ + float temp_res = weight[k]*blockvec[k]; + res += temp_res; + } + int out_index = (batch_shift * vec_row + vr) * width + w; + if (out_index < out_total) { + atomicAdd(&mul[out_index], __float2half(res)); + } + __syncthreads(); + } + } + } +} + + + + +void vecquant8matmul_batched_column_compression_faster_cuda( + torch::Tensor vec, + torch::Tensor mat, + torch::Tensor mul, + torch::Tensor scales, + torch::Tensor zeros +) { + int batch = vec.size(0); + int heads = vec.size(1); + int vec_row = vec.size(2); + int height = vec.size(3); + int width = mat.size(3); + + dim3 blocks( + (height + BLOCKWIDTH - 1) / BLOCKWIDTH, + (width + BLOCKWIDTH - 1) / BLOCKWIDTH + ); + dim3 threads(BLOCKWIDTH); + + VecQuant8BatchMatMulColumnCompressionKernel_faster<<>>( + (half*) vec.data_ptr(), + (uint8_t*) mat.data_ptr(), + (half*) mul.data_ptr(), + (half*) scales.data_ptr(), + (half*) zeros.data_ptr(), + batch, heads, vec_row, height, width + ); + +} + +__global__ void VecQuant8BatchMatMulColumnCompressionKernel_faster( + const half* __restrict__ vec, + const uint8_t* __restrict__ mat, + half* __restrict__ mul, + const half* __restrict__ scales, + const half* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int height, + int width +) { + //int weight_total = batch * heads * height * width; + int input_total = batch * heads * vec_row * height; + int out_total = batch * heads * vec_row * width; + int tid = threadIdx.x; + int h = BLOCKWIDTH * blockIdx.x; + int w = BLOCKWIDTH * blockIdx.y + tid; + if (w >= width && tid >= height) { + return; + } + + __shared__ float blockvec[BLOCKWIDTH]; + int k; + float w_tmp; + float weight[BLOCKWIDTH]; + + for (int b = 0; b < batch; ++b){ + for (int head = 0; head < heads; ++head){ + int batch_shift = b * heads + head; + for (k = 0; k < BLOCKWIDTH; ++k){ + int w_index = (batch_shift * height + h + k) * width + w; + float scale = __half2float(scales[batch_shift * height + h + k]); + float zero = __half2float(zeros[batch_shift * height + h + k]); + w_tmp = mat[w_index]; + weight[k] = scale * (w_tmp-zero); + } + + float res; + for (int vr = 0; vr < vec_row; ++vr){ + res = 0; + int vec_index = (batch_shift * vec_row + vr) * height + blockIdx.x * BLOCKWIDTH + tid; + if (vec_index < input_total) { + blockvec[tid] = __half2float(vec[vec_index]); + } else { + blockvec[tid] = 0; + } + __syncthreads(); + for (k = 0; k < BLOCKWIDTH; ++k){ + res += weight[k]*blockvec[k]; + } + int out_index = (batch_shift * vec_row + vr) * width + w; + if (out_index < out_total) { + atomicAdd(&mul[out_index], __float2half(res)); + } + __syncthreads(); + } + } + } +} + + + +void vecquant8matmul_batched_column_compression_old_cuda( + torch::Tensor vec, + torch::Tensor mat, + torch::Tensor mul, + torch::Tensor scales, + torch::Tensor zeros +) { + int batch = vec.size(0); + int heads = vec.size(1); + int vec_row = vec.size(2); + int height = vec.size(3); + int width = mat.size(3); + + dim3 blocks( + (height + BLOCKWIDTH - 1) / BLOCKWIDTH, + (width + BLOCKWIDTH - 1) / BLOCKWIDTH + ); + dim3 threads(BLOCKWIDTH); + + AT_DISPATCH_FLOATING_TYPES( + vec.type(), "vecquant8matmul_batched_column_compression_old_cuda", ([&] { + VecQuant8BatchMatMulColumnCompressionKernel_old<<>>( + vec.data(), mat.data(), mul.data(), + scales.data(), zeros.data(), + batch, heads, vec_row, height, width + ); + }) + ); + +} + +template +__global__ void VecQuant8BatchMatMulColumnCompressionKernel_old( + const scalar_t* __restrict__ vec, + const uint8_t* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const scalar_t* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int height, + int width +) { + int weight_total = batch * heads * height * width; + int input_total = batch * heads * vec_row * height; + int out_total = batch * heads * vec_row * width; + int tid = threadIdx.x; + // h is index of height with step being BLOCKWIDTH + int h = BLOCKWIDTH * blockIdx.x; + // w is index of width with step being 1 + int w = BLOCKWIDTH * blockIdx.y + tid; + if (w >= width && tid >= height) { + return; + } + + __shared__ scalar_t blockvec[BLOCKWIDTH]; + int k; + scalar_t w_tmp; + + float weight[BLOCKWIDTH]; + + for (int b = 0; b < batch; ++b){ + for (int head = 0; head < heads; ++head){ + int batch_shift = b * heads + head; + for (k = 0; k < BLOCKWIDTH && h + k < height; ++k){ + int w_index = (batch_shift * height + h + k) * width + w; + if (w_index >= weight_total || w >= width) { + weight[k] = 0; + } else { + scalar_t scale = scales[batch_shift * height + h + k]; + scalar_t zero = zeros[batch_shift * height + h + k]; + w_tmp = mat[w_index]; + weight[k] = scale * (w_tmp - zero); + } + } + + scalar_t res; + for (int vr = 0; vr < vec_row; ++vr){ + res = 0; + int vec_index = (batch_shift * vec_row + vr) * height + blockIdx.x * BLOCKWIDTH + tid; + if (vec_index < input_total) { + blockvec[tid] = vec[vec_index]; + } else { + blockvec[tid] = 0; + } + + __syncthreads(); + for (k = 0; k < BLOCKWIDTH && h + k < height; ++k){ + // res is the dot product of BLOCKWIDTH elements (part of width) + res += weight[k] * blockvec[k]; + } + // add res to the final result, final matrix shape: (batch, vec_row, width) + int out_index = (batch_shift * vec_row + vr) * width + w; + if (out_index < out_total) { + atomicAdd(&mul[out_index], res); + } + __syncthreads(); + } + } + } +} + + +void vecquant4matmul_batched_old_cuda( + torch::Tensor vec, + torch::Tensor mat, + torch::Tensor mul, + torch::Tensor scales, + torch::Tensor zeros +) { + int batch = vec.size(0); + int heads = vec.size(1); + int vec_row = vec.size(2); + int vec_height = vec.size(3); + int height = mat.size(2); + int width = mat.size(3); + int zero_width = zeros.size(2); + + dim3 blocks( + (height + BLOCKHEIGHT_OLD4 - 1) / BLOCKHEIGHT_OLD4, + (width + BLOCKWIDTH - 1) / BLOCKWIDTH + ); + dim3 threads(BLOCKWIDTH); + + AT_DISPATCH_FLOATING_TYPES( + vec.type(), "vecquant4matmul_batched_old_cuda", ([&] { + VecQuant4BatchMatMulKernel_old<<>>( + vec.data(), mat.data(), mul.data(), + scales.data(), zeros.data(), + batch, heads, vec_row, vec_height, height, width, zero_width + ); + }) + ); + +} + +template +__global__ void VecQuant4BatchMatMulKernel_old( + const scalar_t* __restrict__ vec, + const uint8_t* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const scalar_t* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int vec_height, + int height, + int width, + int zero_width +) { + int weight_total = batch * heads * height * width; + int input_total = batch * heads * vec_row * vec_height; + int out_total = batch * heads * vec_row * width; + int tid = threadIdx.x; + // h is index of height with step being BLOCKHEIGHT_OLD4 + int h = BLOCKHEIGHT_OLD4 * blockIdx.x; + // w is index of width with step being 1 + int w = BLOCKWIDTH * blockIdx.y + tid; + if (w >= width && tid >= vec_height) { + return; + } + + __shared__ scalar_t blockvec[BLOCKWIDTH]; + // i is index of mat of block first row + int i = width * h + w; + int k; + scalar_t w_tmp; + + float weight[BLOCKWIDTH]; + for (int b = 0; b < batch; ++b){ + for (int head = 0; head < heads; ++head){ + int batch_shift = b * heads + head; + for (k = 0; k < BLOCKWIDTH && h*2 + k < vec_height; ++k){ + int k_w = (k / 2); + int k_bit = (k % 2) * 4; + int w_index = batch_shift * height * width + i + (k_w * width); + if (w_index >= weight_total || w >= width) { + weight[k] = 0; + } else { + scalar_t scale = scales[batch_shift * width + w]; + scalar_t zero = zeros[batch_shift * width + w]; + w_tmp = ((as_unsigned(mat[w_index]) >> k_bit) & 0xF); + weight[k] = scale * (w_tmp - zero); + } + } + + scalar_t res; + for (int vr = 0; vr < vec_row; ++vr){ + res = 0; + int vec_index = (batch_shift * vec_row + vr) * vec_height + blockIdx.x * BLOCKWIDTH + tid; + if (vec_index < input_total) { + blockvec[tid] = vec[vec_index]; + } else { + blockvec[tid] = 0; + } + + __syncthreads(); + for (k = 0; k < BLOCKWIDTH && h*2 + k < vec_height; ++k){ + // res is the dot product of BLOCKWIDTH elements (part of width) + res += weight[k] * blockvec[k]; + } + // add res to the final result, final matrix shape: (batch, vec_row, width) + int out_index = (batch_shift * vec_row + vr) * width + w; + if (out_index < out_total) { + atomicAdd(&mul[out_index], res); + } + __syncthreads(); + } + } + } +} + + + + + +void vecquant4matmul_batched_column_compression_old_cuda( + torch::Tensor vec, + torch::Tensor mat, + torch::Tensor mul, + torch::Tensor scales, + torch::Tensor zeros +) { + int batch = vec.size(0); + int heads = vec.size(1); + int vec_row = vec.size(2); + int height = vec.size(3); + int width = mat.size(3); + + dim3 blocks( + (height + BLOCKHEIGHT_OLD4 - 1) / BLOCKHEIGHT_OLD4, + (width + BLOCKWIDTH - 1) / BLOCKWIDTH + ); + dim3 threads(BLOCKWIDTH); + + AT_DISPATCH_FLOATING_TYPES( + vec.type(), "vecquant4matmul_batched_column_compression_old_cuda", ([&] { + VecQuant4BatchMatMulColumnCompressionKernel_old<<>>( + vec.data(), mat.data(), mul.data(), + scales.data(), zeros.data(), + batch, heads, vec_row, height, width + ); + }) + ); + +} + +template +__global__ void VecQuant4BatchMatMulColumnCompressionKernel_old( + const scalar_t* __restrict__ vec, + const uint8_t* __restrict__ mat, + scalar_t* __restrict__ mul, + const scalar_t* __restrict__ scales, + const scalar_t* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int height, + int width +) { + int weight_total = batch * heads * height * width; + int input_total = batch * heads * vec_row * height; + int out_total = batch * heads * vec_row * width; + int tid = threadIdx.x; + // h is index of height with step being BLOCKWIDTH + int h = BLOCKHEIGHT_OLD4 * blockIdx.x; + // w is index of width with step being 1 + int w = BLOCKWIDTH * blockIdx.y + tid; + if (w >= width && tid >= height) { + return; + } + + __shared__ scalar_t blockvec[BLOCKWIDTH]; + int k; + scalar_t w_tmp; + + float weight[BLOCKWIDTH]; + + for (int b = 0; b < batch; ++b){ + for (int head = 0; head < heads; ++head){ + int batch_shift = b * heads + head; + for (k = 0; k < BLOCKWIDTH && h*2 + k < height; ++k){ + int k_w = (k / 2); + int k_bit = (k % 2) * 4; + int w_index = (batch_shift * height + h + k) * width + k_w; + if (w_index >= weight_total || w >= width) { + weight[k] = 0; + } else { + scalar_t scale = scales[batch_shift * height + h + k]; + scalar_t zero = zeros[batch_shift * height + h + k]; + w_tmp = ((as_unsigned(mat[w_index]) >> k_bit) & 0xF); + weight[k] = scale * (w_tmp - zero); + } + } + + scalar_t res; + for (int vr = 0; vr < vec_row; ++vr){ + res = 0; + int vec_index = (batch_shift * vec_row + vr) * height + blockIdx.x * BLOCKWIDTH + tid; + if (vec_index < input_total) { + blockvec[tid] = vec[vec_index]; + } else { + blockvec[tid] = 0; + } + + __syncthreads(); + for (k = 0; k < BLOCKWIDTH && h*2 + k < height; ++k){ + // res is the dot product of BLOCKWIDTH elements (part of width) + res += weight[k] * blockvec[k]; + } + // add res to the final result, final matrix shape: (batch, vec_row, width) + int out_index = (batch_shift * vec_row + vr) * width + w; + if (out_index < out_total) { + atomicAdd(&mul[out_index], res); + } + __syncthreads(); + } + } + } +} + + + + + +void vecquant8matmul_batched_faster_old_cuda( + torch::Tensor vec, + torch::Tensor mat, + torch::Tensor mul, + torch::Tensor scales, + torch::Tensor zeros +) { + int batch = vec.size(0); + int heads = vec.size(1); + int vec_row = vec.size(2); + int vec_height = vec.size(3); + int height = mat.size(2); + int width = mat.size(3); + + dim3 blocks( + (height + BLOCKWIDTH - 1) / BLOCKWIDTH, + (width + BLOCKWIDTH - 1) / BLOCKWIDTH + ); + dim3 threads(BLOCKWIDTH); + + VecQuant8BatchMatMulKernel_faster_old<<>>( + (half*) vec.data_ptr(), + (uint8_t*) mat.data_ptr(), + (half*) mul.data_ptr(), + (half*) scales.data_ptr(), + (half*) zeros.data_ptr(), + batch, heads, vec_row, vec_height, height, width + ); +} + + +__global__ void VecQuant8BatchMatMulKernel_faster_old( + const half* __restrict__ vec, + const uint8_t* __restrict__ mat, + half* __restrict__ mul, + const half* __restrict__ scales, + const half* __restrict__ zeros, + int batch, + int heads, + int vec_row, + int vec_height, + int height, + int width +) { + int weight_total = batch * heads * height * width; + int input_total = batch * heads * vec_row * vec_height; + int out_total = batch * heads * vec_row * width; + int tid = threadIdx.x; + const int BLOCKWIDTH_half = BLOCKWIDTH/2; + + int h = BLOCKWIDTH * blockIdx.x; //head_dim, dim=-1 + int w = BLOCKWIDTH * blockIdx.y + tid; //seq-len, +0-256 ,dim=-2 + /* + if (w >= width && tid >= vec_height) { + return; + } + */ + __shared__ half blockvec[BLOCKWIDTH]; //256 + int i = width * h + w; + int k; + + half w_tmp1 = __float2half(0); + half w_tmp2 = __float2half(0); + + half2 weight[BLOCKWIDTH_half]; + for (int b = 0; b < batch; ++b){ + for (int head = 0; head < heads; ++head){ + int batch_shift = b * heads + head; + //int zero_index = batch_shift; + for (k = 0; k < BLOCKWIDTH_half; ++k){ + int w_index1 = batch_shift * height * width + i + (2 * k * width); // [batch,head,h+k, w] + int w_index2 = batch_shift * height * width + i + ((2 * k + 1) * width); + int zero_index = batch_shift * width + w; // [batch,head, w] + if (w_index1 >= weight_total || w >= width || (2 * k + h) >= height) { + weight[k] = __float2half2_rn(0); + } else { + float zero_f=__half2float(zeros[zero_index]); + float scale_f= __half2float(scales[zero_index]); + if (w_index2 >= weight_total){ + w_tmp1 = __float2half((as_unsigned(mat[w_index1]) -zero_f)*scale_f); + w_tmp2 = __float2half(0); + weight[k] = __halves2half2(w_tmp1,w_tmp2); + //printf("zero_index is %d w is %d height is %d width is %d w_index1 is %d w_tmp1 is %f w_tmp2 is %f zero is %f scale is %f low is %f high is %f \n ",zero_index,w,height, width,w_index1,__half2float(w_tmp1),__half2float(w_tmp2),zero_f,scale_f,__low2float(weight[k]),__high2float(weight[k])); + }else{ + w_tmp1 = __int2half_rn(as_unsigned(mat[w_index1])); + w_tmp2 = __int2half_rn(as_unsigned(mat[w_index2])); + + //weight[k] = __hmul2(__hsub2(__halves2half2(w_tmp1,w_tmp2), __halves2half2(zero,zero)),__halves2half2(scale,scale)); + weight[k] = __hfma2(__halves2half2(w_tmp1,w_tmp2), __float2half2_rn(scale_f), __float2half2_rn(-(scale_f * zero_f))); + //printf("zero_index1 is %d zero_index2 is %d k is %d head is %d w is %d h is %d height is %d width is %d w_index1 is %d w_index2 is %d zero is %f scale is %f low is %f high is %f \n ",zero_index1,zero_index2,k,head,w,h,height, width,w_index1,w_index2,__half2float(zero1),__half2float(scale1),__low2float(weight[k]),__high2float(weight[k])); + } + } + } + + + for (int vr = 0; vr < vec_row; ++vr){ + float res=0; + int vec_index = (batch_shift * vec_row + vr) * height + blockIdx.x * BLOCKWIDTH + tid; + int out_index = (batch_shift * vec_row + vr) * width + w; + if (vec_index < input_total) { + //blockvec[tid] = __half2float(vec[vec_index]);// [batch, head, vr, tid(seq_len dim+)] + blockvec[tid] = vec[vec_index]; + //printf("width is %d height is %d h is %d w is %d vec_index is %d out_index is %d vec_row is %d vec_height is %d,vr is %d tid is %d blockvec is %f\n",width,height, h,w,vec_index,out_index,vec_row,vec_height,vr,tid,blockvec[tid]); + } else { + blockvec[tid] = __float2half(0); + } + __syncthreads(); + if (out_index < out_total) { + for (k = 0; k < BLOCKWIDTH_half; ++k){ + half2 res2 = __hmul2(weight[k],__halves2half2(blockvec[2*k],blockvec[2*k+1])); + res += __low2float(res2) + __high2float(res2); + } + atomicAdd(&mul[out_index], __float2half(res)); + } + __syncthreads(); + } + } + } +} + + +void vecquant8matmul_batched_column_compression_faster_old_cuda( + torch::Tensor vec, // [batch,heads, seq_q, seq_v] + torch::Tensor mat, // [batch,heads, seq_v, head_dim] + torch::Tensor mul, // [batch,heads, seq_q,head_dim] + torch::Tensor scales, // [batch,heads, head_dim] + torch::Tensor zeros +) { + int batch = vec.size(0); + int heads = vec.size(1); + int vec_row = vec.size(2); //ql + int height = mat.size(2); //vl + int width = mat.size(3); //head_dim + + dim3 blocks( + (height + BLOCKWIDTH - 1) / BLOCKWIDTH, + (width + BLOCKWIDTH - 1) / BLOCKWIDTH + ); + dim3 threads(BLOCKWIDTH); + + VecQuant8BatchMatMulColumnCompressionKernel_faster_old<<>>( + (half*) vec.data_ptr(), + (uint8_t*) mat.data_ptr(), + (half*) mul.data_ptr(), + (half*) scales.data_ptr(), + (half*) zeros.data_ptr(), + batch, heads, vec_row, height, width + ); + +} + + +__global__ void VecQuant8BatchMatMulColumnCompressionKernel_faster_old( + const half* __restrict__ vec, // [batch,heads, seq_q, seq_v] + const uint8_t* __restrict__ mat, // [batch,heads, seq_v, head_dim] + half* __restrict__ mul, // [batch,heads, seq_q,head_dim] + const half* __restrict__ scales, // [batch,heads, seq_v] + const half* __restrict__ zeros, + int batch, + int heads, + int vec_row, //seq_q + int height, //seq_v + int width //head_dim +) { + int weight_total = batch * heads * height * width; + int input_total = batch * heads * vec_row * height; + int out_total = batch * heads * vec_row * width; + int tid = threadIdx.x; + int h = BLOCKWIDTH * blockIdx.x; // vl + int w = BLOCKWIDTH * blockIdx.y + tid; //head_dim + block + if (w >= width && tid >= height) { + return; + } + __shared__ half blockvec[BLOCKWIDTH]; + int k; + half w_tmp1 = __float2half(0); + half w_tmp2 = __float2half(0); + int i = width * h + w; + const int BLOCKWIDTH_half = BLOCKWIDTH/2; + half2 weight[BLOCKWIDTH_half]; + + for (int b = 0; b < batch; ++b){ + for (int head = 0; head < heads; ++head){ + int batch_shift = b * heads + head; + //int zero_index = batch_shift; + for (k = 0; k < BLOCKWIDTH_half; ++k){ + int w_index1 = batch_shift * height * width + i + (2 * k) * width; // [batch,head, h+k, w] + int w_index2 = batch_shift * height * width + i + ((2 * k + 1) * width); + int zero_index1 = batch_shift * height + h + 2*k; // [batch,head, w] + int zero_index2 = batch_shift * height + h + 2*k+1; // [batch,head, w] + + if (w_index1 >= weight_total || (2 * k + h)>=height) { + weight[k]=__float2half2_rn(0); + } else{ + //int zero_index = batch_shift + h; // [batch,head, w] + //float scale_f1 = __half2float(scales[zero_index1]); + //float zero_f1 = __half2float(zeros[zero_index1]); + if (w_index2>=weight_total){ + w_tmp1 = __float2half((as_unsigned(mat[w_index1]) - __half2float(zeros[zero_index1]))* __half2float(scales[zero_index1])); + w_tmp2 = __float2half(0); + weight[k] = __halves2half2(w_tmp1,w_tmp2); + //printf("zero_index is %d k is %d w is %d head is %d height is %d width is %d w_index1 is %d w_tmp1 is %f w_tmp2 is %f zero is %f scale is %f low is %f high is %f \n ",zero_index,k,w,head,height, width,w_index1,__half2float(w_tmp1),__half2float(w_tmp2),zero_f,scale_f,__low2float(weight[k]),__high2float(weight[k])); + }else{ + w_tmp1 = __int2half_rn(as_unsigned(mat[w_index1])); + w_tmp2 = __int2half_rn(as_unsigned(mat[w_index2])); + half zero1=zeros[zero_index1]; + half zero2=zeros[zero_index2]; + half scale1=scales[zero_index1]; + half scale2=scales[zero_index2]; + weight[k] = __hmul2(__hsub2(__halves2half2(w_tmp1,w_tmp2), __halves2half2(zero1,zero2)),__halves2half2(scale1,scale2)); + //weight[k] = __hfma2(__halves2half2(w_tmp1,w_tmp2), __float2half2_rn(scale_f), __float2half2_rn(-(scale_f * zero_f))); + //printf("zero_index1 is %d zero_index2 is %d k is %d head is %d w is %d h is %d height is %d width is %d w_index1 is %d w_index2 is %d zero is %f scale is %f low is %f high is %f \n ",zero_index1,zero_index2,k,head,w,h,height, width,w_index1,w_index2,__half2float(zero1),__half2float(scale1),__low2float(weight[k]),__high2float(weight[k])); + } + } + } + + + for (int vr = 0; vr < vec_row; ++vr){ + float res=0; + int vec_index = (batch_shift * vec_row + vr) * height + blockIdx.x * BLOCKWIDTH + tid; + int out_index = (batch_shift * vec_row + vr) * width + w; + + if (vec_index < input_total) { + //blockvec[tid] = __half2float(vec[vec_index]); + blockvec[tid] = vec[vec_index]; + //printf("vec_index is %d out_index is %d vec_row is %d ,vr is %d tid is %d blockvec is %f\n",vec_index,out_index,vec_row,vr,tid,blockvec[tid]); + } else { + blockvec[tid] = __float2half(0); + //blockvec[tid] = 0; + } + __syncthreads(); + if (out_index < out_total) { + for (k = 0; k < BLOCKWIDTH_half; ++k){ + half2 res2 = __hmul2(weight[k],__halves2half2(blockvec[2*k],blockvec[2*k+1])); + res += __low2float(res2) + __high2float(res2); + } + atomicAdd(&mul[out_index], __float2half(res)); + } + __syncthreads(); + } + } + } +} + diff --git a/config.json b/config.json index 28b2b0397513a7faefd71c3f87ef562fade61cf7..5dea364f582714692cc99439c7d6848f4089343f 100644 --- a/config.json +++ b/config.json @@ -16,24 +16,24 @@ "intermediate_size": 49152, "kv_channels": 128, "layer_norm_epsilon": 1e-06, - "max_position_embeddings": 8192, + "max_position_embeddings": 32768, "model_type": "qwen", "no_bias": true, "num_attention_heads": 64, "num_hidden_layers": 80, "onnx_safe": null, - "padded_vocab_size": 152064, "rope_theta": 1000000, "rotary_emb_base": 1000000, "rotary_pct": 1.0, "scale_attn_weights": true, - "seq_length": 8192, + "seq_length": 32768, "tie_word_embeddings": false, - "tokenizer_type": "QWenTokenizer", + "tokenizer_class": "QWenTokenizer", "transformers_version": "4.32.0", "use_cache": true, "use_dynamic_ntk": false, "use_flash_attn": "auto", "use_logn_attn": false, "vocab_size": 152064 -} \ No newline at end of file +} + diff --git a/configuration_qwen.py b/configuration_qwen.py index f8fe2cb434cefda404c506d541959e2fefc86884..9817a71d3c7902e444a306461fae264df7477b0a 100644 --- a/configuration_qwen.py +++ b/configuration_qwen.py @@ -69,3 +69,4 @@ class QWenConfig(PretrainedConfig): tie_word_embeddings=tie_word_embeddings, **kwargs ) + diff --git a/cpp_kernels.py b/cpp_kernels.py index d9cee703ae23284f63078d8a15aca6d7c5614fdc..72397f746b7b842b907d2bb8190d1659ae6b4e07 100644 --- a/cpp_kernels.py +++ b/cpp_kernels.py @@ -53,3 +53,4 @@ extra_flags = [] cache_autogptq_cuda_256_sources = ["./cache_autogptq_cuda_256.cpp", "./cache_autogptq_cuda_kernel_256.cu"] cache_autogptq_cuda_256 = _cpp_extention_load_helper("cache_autogptq_cuda_256", cache_autogptq_cuda_256_sources, extra_flags) + diff --git a/generation_config.json b/generation_config.json index d4c90aec81cbb25e216774a03aeebb7a5508bb38..e24c6dab8a5046b94da3c2b178b7681b1be1b07c 100644 --- a/generation_config.json +++ b/generation_config.json @@ -11,6 +11,5 @@ ], "top_k": 0, "top_p": 0.8, - "transformers_version": "4.29.2", - "trust_remote_code": true + "transformers_version": "4.31.0" } diff --git a/model-00001-of-00019.safetensors b/model-00001-of-00019.safetensors deleted file mode 100644 index 1fa7317f910b9a05074b67b20f57aa1a77f04dcf..0000000000000000000000000000000000000000 --- a/model-00001-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ebe924a4cf034132106691f7be43f15245f8c94edd3807e6d2ca398c94ce55e6 -size 7726172928 diff --git a/model-00001-of-00082.safetensors b/model-00001-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..09599914c880105fe80b748f3fa91c2c814932e0 --- /dev/null +++ b/model-00001-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b78ee20c52beb8ba28d6374ffe6032f748d6b69dbe1f628c3f9c937ff06313c +size 2491416712 diff --git a/model-00002-of-00019.safetensors b/model-00002-of-00019.safetensors deleted file mode 100644 index 4d5c73d6e9cf39ad1fc4e5f950f706a1d90db481..0000000000000000000000000000000000000000 --- a/model-00002-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:991bafe46de40d30d58ac65f1c7604d075e074c06824dcb85830205cff443f99 -size 7919243200 diff --git a/model-00002-of-00082.safetensors b/model-00002-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..28319a770f9cf95de9babb9beb2dbc1e9d3ba5ca --- /dev/null +++ b/model-00002-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02bf377c7bcfc90e1b4008b656a082f9fcfeb1dc55987968c43d1d33e44eb8fa +size 1744929752 diff --git a/model-00003-of-00019.safetensors b/model-00003-of-00019.safetensors deleted file mode 100644 index af6df57e08dc7c7c2996dc81ba77ae3695924a5e..0000000000000000000000000000000000000000 --- a/model-00003-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:67f5b949bbd41b8a22a64ee27b0c9530d8786655b11d127f13f7aa56a22dfa18 -size 7784976112 diff --git a/model-00003-of-00082.safetensors b/model-00003-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2dc2e105da64c6f6ff2f7f580a29ebb94b85559c --- /dev/null +++ b/model-00003-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40907288644d9da2cf41520809cd603d553187cdc5d2eaf21812ba1af1628815 +size 1744913264 diff --git a/model-00004-of-00019.safetensors b/model-00004-of-00019.safetensors deleted file mode 100644 index 29da9efd25911230ade39fbaddc506df1a82a918..0000000000000000000000000000000000000000 --- a/model-00004-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:bd775ce9c425458df37e056663ad0a1a45859cf9012aacb6fe7200b98dfd8384 -size 7919243232 diff --git a/model-00004-of-00082.safetensors b/model-00004-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95ee279667aba00ca21bd2133b5642ba5cbcf8fc --- /dev/null +++ b/model-00004-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3e3f62cbe8f1d6ca15517a3dea22f0e449f4431dcd91d15c30ce0ed3b21be34 +size 1744913264 diff --git a/model-00005-of-00019.safetensors b/model-00005-of-00019.safetensors deleted file mode 100644 index 53318845123d5de3f526515a7a59c415d70e8568..0000000000000000000000000000000000000000 --- a/model-00005-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:258b03abf1c9406e112d2870bbfd2e03dcdbc5aa68872940267f6c11f7586987 -size 7784976144 diff --git a/model-00005-of-00082.safetensors b/model-00005-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2e045aee8634fa0bee0d5b0228a31f08fccb6ff4 --- /dev/null +++ b/model-00005-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a1c42a60428793fc54596b2858341d14214977c11e05a838f806e44a29c6c2b +size 1744913264 diff --git a/model-00006-of-00019.safetensors b/model-00006-of-00019.safetensors deleted file mode 100644 index 002da096ec4a7d5073bcb351e8a4dd4feabd799e..0000000000000000000000000000000000000000 --- a/model-00006-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cab322ac7b5b4be68b42112927661731d75e30ec7188d48d21e5bac72961946c -size 7919243232 diff --git a/model-00006-of-00082.safetensors b/model-00006-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d85e1d12c5ecc489366834e5cb8b5b04841a2027 --- /dev/null +++ b/model-00006-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c12658ed8b3bb368fd34082cba78cbabaf1bfaf4df105963f0d4475bfe15695 +size 1744913264 diff --git a/model-00007-of-00019.safetensors b/model-00007-of-00019.safetensors deleted file mode 100644 index 4f982e40f84db005209a365a992d04ab4127affc..0000000000000000000000000000000000000000 --- a/model-00007-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:65587e3949fcaa350f1b811e8e840978dc64ed39ab3ca0265a842dc3722bff03 -size 7784976144 diff --git a/model-00007-of-00082.safetensors b/model-00007-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08a3b8b6bac8f8cadba085396dec2110f1286486 --- /dev/null +++ b/model-00007-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:751273ae27cd581b2243962c8effdeefb728ff958ba3ec3d8850d700f65f66e3 +size 1744913264 diff --git a/model-00008-of-00019.safetensors b/model-00008-of-00019.safetensors deleted file mode 100644 index 53c4d26a187b144bab9eaf17fb987c0cef7198ed..0000000000000000000000000000000000000000 --- a/model-00008-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e1eb2c16da1f926dd495874774b0328b5646063a118113dd1fcbbeea6e924390 -size 7919243232 diff --git a/model-00008-of-00082.safetensors b/model-00008-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5f8eef4c902da33a6bbcfc84adc5f4311f5e73cd --- /dev/null +++ b/model-00008-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3505728188463041693baf15441339db200e4e74d036f7a7938b8f3198c9e2c1 +size 1744913264 diff --git a/model-00009-of-00019.safetensors b/model-00009-of-00019.safetensors deleted file mode 100644 index 995b358d7369dba3203c811f94b32a80d6fbd89e..0000000000000000000000000000000000000000 --- a/model-00009-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2bf8d5e061b590a4888a49512e7ed0fac09f20c944c63761416f81b53a895d1b -size 7784976144 diff --git a/model-00009-of-00082.safetensors b/model-00009-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92c3224288b120014fbe47de840b6849d7c3d0f1 --- /dev/null +++ b/model-00009-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9b548e1922b72c1a299420199922b64507b32d83dc034e5521a1e847f321d88 +size 1744913264 diff --git a/model-00010-of-00019.safetensors b/model-00010-of-00019.safetensors deleted file mode 100644 index 4369ce671d0094361efc6d6420fd5e4f732b0e3d..0000000000000000000000000000000000000000 --- a/model-00010-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:326b64c09d13e394e62752b39ff4eb9c7ea8f90db26b47f2b1fa842b3661ead6 -size 7919243232 diff --git a/model-00010-of-00082.safetensors b/model-00010-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..959c025493581318a58349f930a4a2c712074b28 --- /dev/null +++ b/model-00010-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:76f16336c29b5ab0ef1b22905dd03a1d31f270cf8ab601d886642a9e7ed474a7 +size 1744913264 diff --git a/model-00011-of-00019.safetensors b/model-00011-of-00019.safetensors deleted file mode 100644 index add67911f6e4f6b0a7e93bc611822887045cee7c..0000000000000000000000000000000000000000 --- a/model-00011-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:ce703106349e3bfe847945e8930bbf1407a736062cbe8c87de3d30c141618e1c -size 7784976144 diff --git a/model-00011-of-00082.safetensors b/model-00011-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7cf7709190c7074ded56989b1e8ffb5a7ad9920c --- /dev/null +++ b/model-00011-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d29b3a6cf8b3fab521a394e3d895c8c936d231880b02b8b617eda0779be244ba +size 1744913256 diff --git a/model-00012-of-00019.safetensors b/model-00012-of-00019.safetensors deleted file mode 100644 index 551a7ce550f6fff3649084cf138019d28d6c6206..0000000000000000000000000000000000000000 --- a/model-00012-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:17dbfa26cb0d50019d4a457a671d58815e25d210fdad706e0f4fb6643269acb1 -size 7919243232 diff --git a/model-00012-of-00082.safetensors b/model-00012-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6e71997eb055ce1c481afd4586ec91276bceac58 --- /dev/null +++ b/model-00012-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8045521fc95b1ecf0bff8de6e0e348060fcc680e1a93a2015ac39e1d040b9210 +size 1744913272 diff --git a/model-00013-of-00019.safetensors b/model-00013-of-00019.safetensors deleted file mode 100644 index 388c9c94d479976d63efbdbbbfd1270e8c303f8d..0000000000000000000000000000000000000000 --- a/model-00013-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:7975c4b8b0425797dfb035c39f381e3fdf05c433671a80486455c0fb286c675b -size 7784976144 diff --git a/model-00013-of-00082.safetensors b/model-00013-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..351750a168e91c18030f4081d61dcb53a5f60acc --- /dev/null +++ b/model-00013-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cb8615b845f6fc1e25fa9bae1cba90b28ef4770e60e7495e0eb40927523f5973 +size 1744913272 diff --git a/model-00014-of-00019.safetensors b/model-00014-of-00019.safetensors deleted file mode 100644 index 0cb7d7329a0e3e3ad1e9c9716051c3fc533e2ba2..0000000000000000000000000000000000000000 --- a/model-00014-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:df5b7d68c23252a0b5ca8aa6d572abad64f2ab87dffcd3d3d8828dd942286090 -size 7919243232 diff --git a/model-00014-of-00082.safetensors b/model-00014-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d70f9dffd3c70dad4c3dd6f152c66d40b695717f --- /dev/null +++ b/model-00014-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7c7ea4e7e2eee94f220df6be6e297eecf4fdac2920515ba3677b36b7447c19f +size 1744913272 diff --git a/model-00015-of-00019.safetensors b/model-00015-of-00019.safetensors deleted file mode 100644 index 22c287d561cbb8fcca677073613ea355c0b87fc8..0000000000000000000000000000000000000000 --- a/model-00015-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:8219ccd36a7d5f468b26f679e3874ac4e3f4bdf2a224766e06972eb70b3529da -size 7784976144 diff --git a/model-00015-of-00082.safetensors b/model-00015-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..29746a246ced452abcf8522b7944c3ce5e2d4854 --- /dev/null +++ b/model-00015-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c201c3ca71b2a33e4740be67c15bbe6142227a884303434b82104a6fbb4ab696 +size 1744913272 diff --git a/model-00016-of-00019.safetensors b/model-00016-of-00019.safetensors deleted file mode 100644 index 0a6842f454e945bfedfd584b95a790ef99d092ff..0000000000000000000000000000000000000000 --- a/model-00016-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f8a1c79a5df0051a079f5e13f3dee9156a77fbdf2063c5da91f577bc187304da -size 7919243232 diff --git a/model-00016-of-00082.safetensors b/model-00016-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea6c9c18764526724e55262569069e4f00199fb9 --- /dev/null +++ b/model-00016-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe85f55f16678c2b875854bd24e0fe40f239c6db511975c5afe704df9d666979 +size 1744913272 diff --git a/model-00017-of-00019.safetensors b/model-00017-of-00019.safetensors deleted file mode 100644 index 27f94ac3aaa2d0ff061057a99a36fb0140522606..0000000000000000000000000000000000000000 --- a/model-00017-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:f7c1a3fd1a8fe6905ba012dcbd69c6fb5cfde8257919eff12bef3b61e0414e2d -size 7784976144 diff --git a/model-00017-of-00082.safetensors b/model-00017-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0de48c683506e3659c229321168f1ce6cb009de9 --- /dev/null +++ b/model-00017-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e6575a40ace3de35b1e596f261c618de5bb7d6d33c929d7f2a6380882b972a7c +size 1744913272 diff --git a/model-00018-of-00019.safetensors b/model-00018-of-00019.safetensors deleted file mode 100644 index 7151af5a8bbfe0597eef968f0308f4ee2552272b..0000000000000000000000000000000000000000 --- a/model-00018-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:c0b24fd0c929e113faa73a5190af0132ca16b92358b510689ca7c11b1853af73 -size 7919243232 diff --git a/model-00018-of-00082.safetensors b/model-00018-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..90d3f9d64c56a003f9627ab8e063be753c0101a6 --- /dev/null +++ b/model-00018-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed694a7e792b79d63997d73ff81940dd22be41980c877e4cc1433fedf5e755e1 +size 1744913272 diff --git a/model-00019-of-00019.safetensors b/model-00019-of-00019.safetensors deleted file mode 100644 index aaaac160bd1fb9928f3d88defae885f99ab7b3af..0000000000000000000000000000000000000000 --- a/model-00019-of-00019.safetensors +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:a4898686a612d742a7d1d91b61647fa4b3cfd2d73508c0fdd17833121cef696c -size 3296739776 diff --git a/model-00019-of-00082.safetensors b/model-00019-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a558d2b7335cfb8f5f7cd8f6b17d787a74fd88f8 --- /dev/null +++ b/model-00019-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:256ccda665443b2c7c502fbb654923f71974f98557bd7c84f620eaff73f9ed85 +size 1744913272 diff --git a/model-00020-of-00082.safetensors b/model-00020-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f5fb6ffa7ab9b10ca04a52a27ddd6eedb8f857e4 --- /dev/null +++ b/model-00020-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:616bec5738703a2364f596c3366f9544e3d8a89c1eb9181bd3b7242af5be5619 +size 1744913272 diff --git a/model-00021-of-00082.safetensors b/model-00021-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19e987462111bea84034f4591b5bb0fe138fcda0 --- /dev/null +++ b/model-00021-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b1602b5003fceb753025cd16c87288b65416524c8b8668fe4b0c920820c67cca +size 1744913272 diff --git a/model-00022-of-00082.safetensors b/model-00022-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0ac2e91dde9c234c2fb20b5cb3b4f9b642fbc0a2 --- /dev/null +++ b/model-00022-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75b7178fd929c3fd4ccd4168db338ab199c6b7ddc896fddb83d83c7ccded4f54 +size 1744913272 diff --git a/model-00023-of-00082.safetensors b/model-00023-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ec94071ffecebddbb2a02ca8c0baee7424db018f --- /dev/null +++ b/model-00023-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b9cf968f7a6df52eddb5302047b7c4ab5eb74d980467a70b6b9f7cdb987cd570 +size 1744913272 diff --git a/model-00024-of-00082.safetensors b/model-00024-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3c2991a13733da0491c702d4db84439e630e8e62 --- /dev/null +++ b/model-00024-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b032889a50034c59792c7cdd17192684bb643890536bd7393807e9652e25c686 +size 1744913272 diff --git a/model-00025-of-00082.safetensors b/model-00025-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ef0fb3f8691521636260629a683731a19b03533c --- /dev/null +++ b/model-00025-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5520809074d934cb07215125db6e95cb139c729e54c4417f4f349df913d1b75e +size 1744913272 diff --git a/model-00026-of-00082.safetensors b/model-00026-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cfd95c1bf4c4b1ffc132ece7bb6fda0ab1bce9f2 --- /dev/null +++ b/model-00026-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dbeb8d2f8b8450d588aaf2d28f1b1ef02563e430ed84ab71bc7cb982fa969100 +size 1744913272 diff --git a/model-00027-of-00082.safetensors b/model-00027-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fbe31d23601e7282adaf94b75114b50eeddcad7d --- /dev/null +++ b/model-00027-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf91b60f20eb6db66d714d8392a153665fe96c225b9dbbb9aa1482cf8dcd1a82 +size 1744913272 diff --git a/model-00028-of-00082.safetensors b/model-00028-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bbc2e9a30d2544c2ceba6b548a718c2fb9994ad5 --- /dev/null +++ b/model-00028-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2701e032245071c17206586132bd904469404566bddd5310b1402e98c13eb53d +size 1744913272 diff --git a/model-00029-of-00082.safetensors b/model-00029-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..607f690604d0c9097c97cdbcd75048ee774f8fe8 --- /dev/null +++ b/model-00029-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ca6761b06d113acf9ce64f2cb17215be4d789540cd80fcaf1ed911f49cebe8d +size 1744913272 diff --git a/model-00030-of-00082.safetensors b/model-00030-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ffdb56d503ae30341c7f08ae3ce3f5ee37fd8793 --- /dev/null +++ b/model-00030-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74d6558da8470030d4b9cea9fd415a8d72779588102124324167b17f6e41fe06 +size 1744913272 diff --git a/model-00031-of-00082.safetensors b/model-00031-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..902908f35b67dffcafb8e82f09dea0383ebc9bd2 --- /dev/null +++ b/model-00031-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4e5a202e80c62aebf6027cffc889b160a41d5161e9f1d571d5d17c4a0d52e9a +size 1744913272 diff --git a/model-00032-of-00082.safetensors b/model-00032-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..46e2582442c77da44b17ac4f89dd11ac8b14a185 --- /dev/null +++ b/model-00032-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa46299fc82fa0aa301979751dc9667b75968d4350ba3b371db534c560500e9c +size 1744913272 diff --git a/model-00033-of-00082.safetensors b/model-00033-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e8c61232b302c0f64ea729d1b1774a36b43b7e54 --- /dev/null +++ b/model-00033-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:580d5f5653cfe66f636d4c3bd4751c164f635c7eb91069098c2533e6cadfc4d1 +size 1744913272 diff --git a/model-00034-of-00082.safetensors b/model-00034-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fb0e3805883ca4e6bae3e4d1514b952ef5de3e3d --- /dev/null +++ b/model-00034-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:653397358e07cf9940234633469befc4d18c237348829a8c3266c37b717a1a65 +size 1744913272 diff --git a/model-00035-of-00082.safetensors b/model-00035-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d33245aee909dbabccc815dc6149d0df02c00646 --- /dev/null +++ b/model-00035-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6478a69f28f7b0b814cb1f507f9cfe44d2a5aa8e9528e44804bfdf656a63e866 +size 1744913272 diff --git a/model-00036-of-00082.safetensors b/model-00036-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ac4cc41c9d73e5fd9672081c0b9ca1f5765471f --- /dev/null +++ b/model-00036-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5cbbc3ca8492289e9161a82f5680c7380def9da21814e54517ba2783dfe7041 +size 1744913272 diff --git a/model-00037-of-00082.safetensors b/model-00037-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..370007f4aa028e096c62dbdf712774d102bf50c8 --- /dev/null +++ b/model-00037-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8c67bab4c5bbc56c298573f72ed7bf793c2c1cd2d19e7b4ed088cafec28d1533 +size 1744913272 diff --git a/model-00038-of-00082.safetensors b/model-00038-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cc9ddc54a7e739da670c0961e2b4cb00ccf9514a --- /dev/null +++ b/model-00038-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d09632827abefa3521a7f97f589ea854b807366dbd55b0838d448349b03c38d2 +size 1744913272 diff --git a/model-00039-of-00082.safetensors b/model-00039-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7b171a84061ea66e98d86918835f237a7471bdf --- /dev/null +++ b/model-00039-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da488f61fd7dd19b9c8c0e48f670ecef38b3b2329fca7fabe83dde8165524932 +size 1744913272 diff --git a/model-00040-of-00082.safetensors b/model-00040-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d76bf153dd3ac9b1a76051ea0c2d155ea33048f7 --- /dev/null +++ b/model-00040-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e8cf629905e20e50ea75b73ea0ec38f82390b21374a7ec5c8c76837a333fadf +size 1744913272 diff --git a/model-00041-of-00082.safetensors b/model-00041-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..de182590a8da805ed8d6b621dcb9aebefdee14bd --- /dev/null +++ b/model-00041-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e99156529a9c44026dc7648f0a057e68fbbb78c857d5b829a7df7c9fe64b085 +size 1744913272 diff --git a/model-00042-of-00082.safetensors b/model-00042-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5e52c9d4686ccd6cd79892838526087aa8bfd47a --- /dev/null +++ b/model-00042-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e69d18f3969ffd7f070f2380899f9244519badc3b426543c20a25dc96f1beadd +size 1744913272 diff --git a/model-00043-of-00082.safetensors b/model-00043-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9656bac2f047a994acc0e75789f9486501d2f94d --- /dev/null +++ b/model-00043-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27545bd2394a4840cbbaad02b78f89fbe2dd4d6a8817f90d8637e728a07d0758 +size 1744913272 diff --git a/model-00044-of-00082.safetensors b/model-00044-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08f3fea643b4e2abc6ab0390396a1b1ce7f7754d --- /dev/null +++ b/model-00044-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8a6ea449c4f847ae081a5b95bb54a9a516291e67e296080bce452013d0ca1d6 +size 1744913272 diff --git a/model-00045-of-00082.safetensors b/model-00045-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f512732f9322b3d5796a5da6c52a0b3c7b66c4c9 --- /dev/null +++ b/model-00045-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c922adbc48ea9a50be3d5557cae0b0e7b689d247c3143e94e2edd246dac47cd6 +size 1744913272 diff --git a/model-00046-of-00082.safetensors b/model-00046-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b733ea2b3883f2cc46f99aaaccd331d68b1dca5 --- /dev/null +++ b/model-00046-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b5b73a46e88fd8aaa957cc7088648c433e2366d6e473b1f909532421f40255 +size 1744913272 diff --git a/model-00047-of-00082.safetensors b/model-00047-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d5c8bde17ee33ea80b867ecf9b7754cd6962d19 --- /dev/null +++ b/model-00047-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9ec013f161cad543adb3790b91ac6ae5dfe327ba528f93810b6e2401ad493c0 +size 1744913272 diff --git a/model-00048-of-00082.safetensors b/model-00048-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..15147d9c5f2aa21efd4066f29d55bd5a25f1a87d --- /dev/null +++ b/model-00048-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:257732341f0cde311257e2b8b6b7514f3925caa4ef863fdb9d769c4d05ffdad5 +size 1744913272 diff --git a/model-00049-of-00082.safetensors b/model-00049-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08aef472a6803e4e0be25db5b36cdc5124258773 --- /dev/null +++ b/model-00049-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84511cd0b010be3250c41d803997f99e9128176eb74d5ea8fb1ea33ee56d5473 +size 1744913272 diff --git a/model-00050-of-00082.safetensors b/model-00050-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fcf1599f3efde9adc3f4d264c6b475c3bff47ee6 --- /dev/null +++ b/model-00050-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8514300ad2d710135922946ed02fba91967bf1108816be671c0021ed1b3648ee +size 1744913272 diff --git a/model-00051-of-00082.safetensors b/model-00051-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d0ae3ad13ddbd9facf4fc52ae5480e8dbcd4f56e --- /dev/null +++ b/model-00051-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb5f7fd32aab9f60605142248e7ca91f2b050f1314e212ef6fd92cc65ce93208 +size 1744913272 diff --git a/model-00052-of-00082.safetensors b/model-00052-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..20bdf07fc059addcfa93e38b202ea748712b0951 --- /dev/null +++ b/model-00052-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd5bed2581ced0d4a7d01c9217764ceb22c8bbcf8cbb3c25290f86137a100004 +size 1744913272 diff --git a/model-00053-of-00082.safetensors b/model-00053-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..209347213e2ce5f1c2ce4dd28d605cf91a6bac08 --- /dev/null +++ b/model-00053-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9268b11e441649d0e7b8993878cc6f7e8a5232ebe0d4dc53e398e9a646a4c046 +size 1744913272 diff --git a/model-00054-of-00082.safetensors b/model-00054-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..19942566e35b76f686603c00f50eb9f392a11623 --- /dev/null +++ b/model-00054-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c382c1d86a276601be08f3764774296269ea78ea910cdfee7ed3baed88c8578d +size 1744913272 diff --git a/model-00055-of-00082.safetensors b/model-00055-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0553a1c1c5349b21c37c2da6f58e2cd8013946ee --- /dev/null +++ b/model-00055-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f180fe2b8c9d7e51411634eadc3906e065801a576618c043ca2152c56cdfee2d +size 1744913272 diff --git a/model-00056-of-00082.safetensors b/model-00056-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..97ea39cbc5b99a28e233d6ca8416397c53828833 --- /dev/null +++ b/model-00056-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:490e4d0999c96790b6978bef2ee2a68367e8ab2d4924302c051798070e1ef2ce +size 1744913272 diff --git a/model-00057-of-00082.safetensors b/model-00057-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..061bdf192a74919a8c08a0f46b45f6d8aa3c7ea5 --- /dev/null +++ b/model-00057-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a09ab8fabfd7b69ad5e3dc42c194fe347f6c44ea5101aac303189e554a1d8835 +size 1744913272 diff --git a/model-00058-of-00082.safetensors b/model-00058-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cb7ae95ec4a9a8cade68f3ffa8b2fb3e04fe6f5f --- /dev/null +++ b/model-00058-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f5ce545162620d5c1a4c1797208488e1fc2b147d5bc78610b7b1e005f5d11c8 +size 1744913272 diff --git a/model-00059-of-00082.safetensors b/model-00059-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4e4689568b20196db139eb3613b500dc20b466ef --- /dev/null +++ b/model-00059-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f492c0ece179b4959b21a90469ca00e32e7b1a3a2563ba882e8229ce07893aa1 +size 1744913272 diff --git a/model-00060-of-00082.safetensors b/model-00060-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc37e6ad859ebc13ad49b88810b19dcfcdc0472c --- /dev/null +++ b/model-00060-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c93fca9a5c5915ecaf07daf6f8dd1fc41242d06f80c49a63138f7df83888a0d +size 1744913272 diff --git a/model-00061-of-00082.safetensors b/model-00061-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fc4a4361930726e3ba5fb943f547dc586cf5d014 --- /dev/null +++ b/model-00061-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3394edf769463f0aaaf7c3c01a1db900049f9f9da7da71a04775e31da9033d44 +size 1744913272 diff --git a/model-00062-of-00082.safetensors b/model-00062-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cbd01da4665032fb1ff2c92d566236643f36c50e --- /dev/null +++ b/model-00062-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96bfea9d8b116245b7cfc6dc4aa228de94a9743064f9a04d269725cfeae1bedb +size 1744913272 diff --git a/model-00063-of-00082.safetensors b/model-00063-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4ae00fa6ad1d66037cadb9e5a7769982b9231a2a --- /dev/null +++ b/model-00063-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd940aa43cd0709b711496f85558dfdffa7158a03ab2a281acbde7f7fc5b724f +size 1744913272 diff --git a/model-00064-of-00082.safetensors b/model-00064-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0269db4cffdbed1ad47f57cbf3fcd6bcd02f69ad --- /dev/null +++ b/model-00064-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6943a04428413ced8979bd251b8bd1708cd107c9e25fbccae9a5fa13c2244038 +size 1744913272 diff --git a/model-00065-of-00082.safetensors b/model-00065-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6bf563c5023be01d00263638b313057d4776059b --- /dev/null +++ b/model-00065-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2f1aeaf5a1ff3026d5eb012a67d53fe6b762ae7499f482ebc4a2555456202c40 +size 1744913272 diff --git a/model-00066-of-00082.safetensors b/model-00066-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a1d9b48d02e38052deebc81c838747f37b2aaa72 --- /dev/null +++ b/model-00066-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:306983ac6a6cb6a4c1e41f98874c2047e41e50a1d53a1e09692546fd5eb6cceb +size 1744913272 diff --git a/model-00067-of-00082.safetensors b/model-00067-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a8d9d1ee0f5ba91d5fbfa6413376ddab237ca08a --- /dev/null +++ b/model-00067-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51c7c0b89210d41ddd55fb133a3ce288750cb15ea021405bcc3a207df3dfefe0 +size 1744913272 diff --git a/model-00068-of-00082.safetensors b/model-00068-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..24d12b778bce92bdd6ec7c28af5674de6e47faa8 --- /dev/null +++ b/model-00068-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e1e6353ecbadd4b0442008abf8a41b2ccdb614543cb0a77b1e3f75b7e7874d3 +size 1744913272 diff --git a/model-00069-of-00082.safetensors b/model-00069-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..274c389b6a1e351f1f1e2abba1ce6b46cd8f737e --- /dev/null +++ b/model-00069-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b63cc7734dd8b2159001c4974f9b8d7350f64cae77509b1be1dc50dc465fbab9 +size 1744913272 diff --git a/model-00070-of-00082.safetensors b/model-00070-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b8ecc3bc0044fad226f53c28e36fec63daa70791 --- /dev/null +++ b/model-00070-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6229a186fff359007d4f96ded8348d60de7f04dabd70b67d3cfb17332709fcf9 +size 1744913272 diff --git a/model-00071-of-00082.safetensors b/model-00071-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f9fd4042ca5426643008ce197cf6322e83e5fde3 --- /dev/null +++ b/model-00071-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7594dabca8333d2b01e4f2ad95dbca6888fb67e997994a46036c04437abaa490 +size 1744913272 diff --git a/model-00072-of-00082.safetensors b/model-00072-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..10a2e1f26577005e158e8368d9e91c7af1e4ca14 --- /dev/null +++ b/model-00072-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bdbd04c20bcb1d2dae065adcc35ff550126a373088b190a7275180e636707b1 +size 1744913272 diff --git a/model-00073-of-00082.safetensors b/model-00073-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a657fd6ad151365652b33bcb787947795a9dac01 --- /dev/null +++ b/model-00073-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f706fc06ca961512fcb0e1599a4aa9a00a65de7acc97afb71086d27eb1889191 +size 1744913272 diff --git a/model-00074-of-00082.safetensors b/model-00074-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e31caa51bed7317a7aad2e6a55eb7bf36ef67607 --- /dev/null +++ b/model-00074-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bb4a9e046e352475544ead4e5ddb450d92c9fb0d7759bb8d8fb263f9a7d2a77 +size 1744913272 diff --git a/model-00075-of-00082.safetensors b/model-00075-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c3c8c8e80e86144789e397001892bfe716ed0d50 --- /dev/null +++ b/model-00075-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0604a722ff70feca57859b8c9a6c3717926a29515ec47dd5faf9c157332c052 +size 1744913272 diff --git a/model-00076-of-00082.safetensors b/model-00076-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0b3c137ecedfb05d52f41763138028eb3e55469 --- /dev/null +++ b/model-00076-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f86a992864e78dcaf6d88768f67d02168f6b52dff0f05dfa9fbc2b9906760a4e +size 1744913272 diff --git a/model-00077-of-00082.safetensors b/model-00077-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4522535ffd56cc80839f01002793c5556692a18f --- /dev/null +++ b/model-00077-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09ec33e2865640ff8e0682ae80d65f5c7d7852c8755a6c75e65882e3f57c3878 +size 1744913272 diff --git a/model-00078-of-00082.safetensors b/model-00078-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f9e860166a87b68aed975675a0f07d7884fc7996 --- /dev/null +++ b/model-00078-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64e711d81b48b224ba87760fb764fc38036a88c46fa839062467a533ec225565 +size 1744913272 diff --git a/model-00079-of-00082.safetensors b/model-00079-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..512cb631c3ffc2bb36de28a18c61c78b8033f3c2 --- /dev/null +++ b/model-00079-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2abda654f103c62b38882360f657d6e3de4c0a22a6e34fc60992bf4802324f3 +size 1744913272 diff --git a/model-00080-of-00082.safetensors b/model-00080-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..16332c9073ec2b2fb15f9c605ffacd9d5268c36a --- /dev/null +++ b/model-00080-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:514fef25c73432557e430744b94832a4a539b7708b8aa27304bd7ed8cf518809 +size 1744913272 diff --git a/model-00081-of-00082.safetensors b/model-00081-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1864575f886bb5fc3f8614d89188c311e1f8b584 --- /dev/null +++ b/model-00081-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d320e425bbf02d1e2ebf14e0f582cbbe6e3c6b4838f4497c9a63ef5ae9e5d148 +size 1744913272 diff --git a/model-00082-of-00082.safetensors b/model-00082-of-00082.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0dff52e7c468b5e238437f915b708c85defedc3c --- /dev/null +++ b/model-00082-of-00082.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbf6116d9c62a64cf34dfc60ac848dcbe243cf0d3fdc40f63d18ad1898f497ab +size 2491416704 diff --git a/model.safetensors.index.json b/model.safetensors.index.json index f99b84d056f982bbdf30383636fd93b18ab39829..2eb1a8dcfd8d01a926a0c63bcad3793e8ff490f8 100644 --- a/model.safetensors.index.json +++ b/model.safetensors.index.json @@ -3,648 +3,648 @@ "total_size": 144575840256 }, "weight_map": { - "lm_head.weight": "model-00019-of-00019.safetensors", - "transformer.h.0.attn.c_attn.bias": "model-00001-of-00019.safetensors", - "transformer.h.0.attn.c_attn.weight": "model-00001-of-00019.safetensors", - "transformer.h.0.attn.c_proj.weight": "model-00001-of-00019.safetensors", - "transformer.h.0.ln_1.weight": "model-00001-of-00019.safetensors", - "transformer.h.0.ln_2.weight": "model-00001-of-00019.safetensors", - "transformer.h.0.mlp.c_proj.weight": "model-00001-of-00019.safetensors", - "transformer.h.0.mlp.w1.weight": "model-00001-of-00019.safetensors", - "transformer.h.0.mlp.w2.weight": "model-00001-of-00019.safetensors", - "transformer.h.1.attn.c_attn.bias": "model-00001-of-00019.safetensors", - "transformer.h.1.attn.c_attn.weight": "model-00001-of-00019.safetensors", - "transformer.h.1.attn.c_proj.weight": "model-00001-of-00019.safetensors", - "transformer.h.1.ln_1.weight": "model-00001-of-00019.safetensors", - "transformer.h.1.ln_2.weight": "model-00001-of-00019.safetensors", - "transformer.h.1.mlp.c_proj.weight": "model-00001-of-00019.safetensors", - "transformer.h.1.mlp.w1.weight": "model-00001-of-00019.safetensors", - "transformer.h.1.mlp.w2.weight": "model-00001-of-00019.safetensors", - "transformer.h.10.attn.c_attn.bias": "model-00003-of-00019.safetensors", - "transformer.h.10.attn.c_attn.weight": "model-00003-of-00019.safetensors", - "transformer.h.10.attn.c_proj.weight": "model-00003-of-00019.safetensors", - "transformer.h.10.ln_1.weight": "model-00003-of-00019.safetensors", - "transformer.h.10.ln_2.weight": "model-00003-of-00019.safetensors", - "transformer.h.10.mlp.c_proj.weight": "model-00003-of-00019.safetensors", - "transformer.h.10.mlp.w1.weight": "model-00003-of-00019.safetensors", - "transformer.h.10.mlp.w2.weight": "model-00003-of-00019.safetensors", - "transformer.h.11.attn.c_attn.bias": "model-00003-of-00019.safetensors", - "transformer.h.11.attn.c_attn.weight": "model-00003-of-00019.safetensors", - "transformer.h.11.attn.c_proj.weight": "model-00003-of-00019.safetensors", - "transformer.h.11.ln_1.weight": "model-00003-of-00019.safetensors", - "transformer.h.11.ln_2.weight": "model-00003-of-00019.safetensors", - "transformer.h.11.mlp.c_proj.weight": "model-00003-of-00019.safetensors", - "transformer.h.11.mlp.w1.weight": "model-00003-of-00019.safetensors", - "transformer.h.11.mlp.w2.weight": "model-00003-of-00019.safetensors", - "transformer.h.12.attn.c_attn.bias": "model-00004-of-00019.safetensors", - "transformer.h.12.attn.c_attn.weight": "model-00004-of-00019.safetensors", - "transformer.h.12.attn.c_proj.weight": "model-00004-of-00019.safetensors", - "transformer.h.12.ln_1.weight": "model-00003-of-00019.safetensors", - "transformer.h.12.ln_2.weight": "model-00004-of-00019.safetensors", - "transformer.h.12.mlp.c_proj.weight": "model-00004-of-00019.safetensors", - "transformer.h.12.mlp.w1.weight": "model-00004-of-00019.safetensors", - "transformer.h.12.mlp.w2.weight": "model-00004-of-00019.safetensors", - "transformer.h.13.attn.c_attn.bias": "model-00004-of-00019.safetensors", - "transformer.h.13.attn.c_attn.weight": "model-00004-of-00019.safetensors", - "transformer.h.13.attn.c_proj.weight": "model-00004-of-00019.safetensors", - "transformer.h.13.ln_1.weight": "model-00004-of-00019.safetensors", - "transformer.h.13.ln_2.weight": "model-00004-of-00019.safetensors", - "transformer.h.13.mlp.c_proj.weight": "model-00004-of-00019.safetensors", - "transformer.h.13.mlp.w1.weight": "model-00004-of-00019.safetensors", - "transformer.h.13.mlp.w2.weight": "model-00004-of-00019.safetensors", - "transformer.h.14.attn.c_attn.bias": "model-00004-of-00019.safetensors", - "transformer.h.14.attn.c_attn.weight": "model-00004-of-00019.safetensors", - "transformer.h.14.attn.c_proj.weight": "model-00004-of-00019.safetensors", - "transformer.h.14.ln_1.weight": "model-00004-of-00019.safetensors", - "transformer.h.14.ln_2.weight": "model-00004-of-00019.safetensors", - "transformer.h.14.mlp.c_proj.weight": "model-00004-of-00019.safetensors", - "transformer.h.14.mlp.w1.weight": "model-00004-of-00019.safetensors", - "transformer.h.14.mlp.w2.weight": "model-00004-of-00019.safetensors", - "transformer.h.15.attn.c_attn.bias": "model-00004-of-00019.safetensors", - "transformer.h.15.attn.c_attn.weight": "model-00004-of-00019.safetensors", - "transformer.h.15.attn.c_proj.weight": "model-00004-of-00019.safetensors", - "transformer.h.15.ln_1.weight": "model-00004-of-00019.safetensors", - "transformer.h.15.ln_2.weight": "model-00004-of-00019.safetensors", - "transformer.h.15.mlp.c_proj.weight": "model-00004-of-00019.safetensors", - "transformer.h.15.mlp.w1.weight": "model-00004-of-00019.safetensors", - "transformer.h.15.mlp.w2.weight": "model-00004-of-00019.safetensors", - "transformer.h.16.attn.c_attn.bias": "model-00004-of-00019.safetensors", - "transformer.h.16.attn.c_attn.weight": "model-00004-of-00019.safetensors", - "transformer.h.16.attn.c_proj.weight": "model-00004-of-00019.safetensors", - "transformer.h.16.ln_1.weight": "model-00004-of-00019.safetensors", - "transformer.h.16.ln_2.weight": "model-00004-of-00019.safetensors", - "transformer.h.16.mlp.c_proj.weight": "model-00005-of-00019.safetensors", - "transformer.h.16.mlp.w1.weight": "model-00004-of-00019.safetensors", - "transformer.h.16.mlp.w2.weight": "model-00005-of-00019.safetensors", - "transformer.h.17.attn.c_attn.bias": "model-00005-of-00019.safetensors", - "transformer.h.17.attn.c_attn.weight": "model-00005-of-00019.safetensors", - "transformer.h.17.attn.c_proj.weight": "model-00005-of-00019.safetensors", - "transformer.h.17.ln_1.weight": "model-00005-of-00019.safetensors", - "transformer.h.17.ln_2.weight": "model-00005-of-00019.safetensors", - "transformer.h.17.mlp.c_proj.weight": "model-00005-of-00019.safetensors", - "transformer.h.17.mlp.w1.weight": "model-00005-of-00019.safetensors", - "transformer.h.17.mlp.w2.weight": "model-00005-of-00019.safetensors", - "transformer.h.18.attn.c_attn.bias": "model-00005-of-00019.safetensors", - "transformer.h.18.attn.c_attn.weight": "model-00005-of-00019.safetensors", - "transformer.h.18.attn.c_proj.weight": "model-00005-of-00019.safetensors", - "transformer.h.18.ln_1.weight": "model-00005-of-00019.safetensors", - "transformer.h.18.ln_2.weight": "model-00005-of-00019.safetensors", - "transformer.h.18.mlp.c_proj.weight": "model-00005-of-00019.safetensors", - "transformer.h.18.mlp.w1.weight": "model-00005-of-00019.safetensors", - "transformer.h.18.mlp.w2.weight": "model-00005-of-00019.safetensors", - "transformer.h.19.attn.c_attn.bias": "model-00005-of-00019.safetensors", - "transformer.h.19.attn.c_attn.weight": "model-00005-of-00019.safetensors", - "transformer.h.19.attn.c_proj.weight": "model-00005-of-00019.safetensors", - "transformer.h.19.ln_1.weight": "model-00005-of-00019.safetensors", - "transformer.h.19.ln_2.weight": "model-00005-of-00019.safetensors", - "transformer.h.19.mlp.c_proj.weight": "model-00005-of-00019.safetensors", - "transformer.h.19.mlp.w1.weight": "model-00005-of-00019.safetensors", - "transformer.h.19.mlp.w2.weight": "model-00005-of-00019.safetensors", - "transformer.h.2.attn.c_attn.bias": "model-00001-of-00019.safetensors", - "transformer.h.2.attn.c_attn.weight": "model-00001-of-00019.safetensors", - "transformer.h.2.attn.c_proj.weight": "model-00001-of-00019.safetensors", - "transformer.h.2.ln_1.weight": "model-00001-of-00019.safetensors", - "transformer.h.2.ln_2.weight": "model-00001-of-00019.safetensors", - "transformer.h.2.mlp.c_proj.weight": "model-00001-of-00019.safetensors", - "transformer.h.2.mlp.w1.weight": "model-00001-of-00019.safetensors", - "transformer.h.2.mlp.w2.weight": "model-00001-of-00019.safetensors", - "transformer.h.20.attn.c_attn.bias": "model-00005-of-00019.safetensors", - "transformer.h.20.attn.c_attn.weight": "model-00005-of-00019.safetensors", - "transformer.h.20.attn.c_proj.weight": "model-00005-of-00019.safetensors", - "transformer.h.20.ln_1.weight": "model-00005-of-00019.safetensors", - "transformer.h.20.ln_2.weight": "model-00005-of-00019.safetensors", - "transformer.h.20.mlp.c_proj.weight": "model-00005-of-00019.safetensors", - "transformer.h.20.mlp.w1.weight": "model-00005-of-00019.safetensors", - "transformer.h.20.mlp.w2.weight": "model-00005-of-00019.safetensors", - "transformer.h.21.attn.c_attn.bias": "model-00006-of-00019.safetensors", - "transformer.h.21.attn.c_attn.weight": "model-00006-of-00019.safetensors", - "transformer.h.21.attn.c_proj.weight": "model-00006-of-00019.safetensors", - "transformer.h.21.ln_1.weight": "model-00005-of-00019.safetensors", - "transformer.h.21.ln_2.weight": "model-00006-of-00019.safetensors", - "transformer.h.21.mlp.c_proj.weight": "model-00006-of-00019.safetensors", - "transformer.h.21.mlp.w1.weight": "model-00006-of-00019.safetensors", - "transformer.h.21.mlp.w2.weight": "model-00006-of-00019.safetensors", - "transformer.h.22.attn.c_attn.bias": "model-00006-of-00019.safetensors", - "transformer.h.22.attn.c_attn.weight": "model-00006-of-00019.safetensors", - "transformer.h.22.attn.c_proj.weight": "model-00006-of-00019.safetensors", - "transformer.h.22.ln_1.weight": "model-00006-of-00019.safetensors", - "transformer.h.22.ln_2.weight": "model-00006-of-00019.safetensors", - "transformer.h.22.mlp.c_proj.weight": "model-00006-of-00019.safetensors", - "transformer.h.22.mlp.w1.weight": "model-00006-of-00019.safetensors", - "transformer.h.22.mlp.w2.weight": "model-00006-of-00019.safetensors", - "transformer.h.23.attn.c_attn.bias": "model-00006-of-00019.safetensors", - "transformer.h.23.attn.c_attn.weight": "model-00006-of-00019.safetensors", - "transformer.h.23.attn.c_proj.weight": "model-00006-of-00019.safetensors", - "transformer.h.23.ln_1.weight": "model-00006-of-00019.safetensors", - "transformer.h.23.ln_2.weight": "model-00006-of-00019.safetensors", - "transformer.h.23.mlp.c_proj.weight": "model-00006-of-00019.safetensors", - "transformer.h.23.mlp.w1.weight": "model-00006-of-00019.safetensors", - "transformer.h.23.mlp.w2.weight": "model-00006-of-00019.safetensors", - "transformer.h.24.attn.c_attn.bias": "model-00006-of-00019.safetensors", - "transformer.h.24.attn.c_attn.weight": "model-00006-of-00019.safetensors", - "transformer.h.24.attn.c_proj.weight": "model-00006-of-00019.safetensors", - "transformer.h.24.ln_1.weight": "model-00006-of-00019.safetensors", - "transformer.h.24.ln_2.weight": "model-00006-of-00019.safetensors", - "transformer.h.24.mlp.c_proj.weight": "model-00006-of-00019.safetensors", - "transformer.h.24.mlp.w1.weight": "model-00006-of-00019.safetensors", - "transformer.h.24.mlp.w2.weight": "model-00006-of-00019.safetensors", - "transformer.h.25.attn.c_attn.bias": "model-00006-of-00019.safetensors", - "transformer.h.25.attn.c_attn.weight": "model-00006-of-00019.safetensors", - "transformer.h.25.attn.c_proj.weight": "model-00006-of-00019.safetensors", - "transformer.h.25.ln_1.weight": "model-00006-of-00019.safetensors", - "transformer.h.25.ln_2.weight": "model-00006-of-00019.safetensors", - "transformer.h.25.mlp.c_proj.weight": "model-00007-of-00019.safetensors", - "transformer.h.25.mlp.w1.weight": "model-00006-of-00019.safetensors", - "transformer.h.25.mlp.w2.weight": "model-00007-of-00019.safetensors", - "transformer.h.26.attn.c_attn.bias": "model-00007-of-00019.safetensors", - "transformer.h.26.attn.c_attn.weight": "model-00007-of-00019.safetensors", - "transformer.h.26.attn.c_proj.weight": "model-00007-of-00019.safetensors", - "transformer.h.26.ln_1.weight": "model-00007-of-00019.safetensors", - "transformer.h.26.ln_2.weight": "model-00007-of-00019.safetensors", - "transformer.h.26.mlp.c_proj.weight": "model-00007-of-00019.safetensors", - "transformer.h.26.mlp.w1.weight": "model-00007-of-00019.safetensors", - "transformer.h.26.mlp.w2.weight": "model-00007-of-00019.safetensors", - "transformer.h.27.attn.c_attn.bias": "model-00007-of-00019.safetensors", - "transformer.h.27.attn.c_attn.weight": "model-00007-of-00019.safetensors", - "transformer.h.27.attn.c_proj.weight": "model-00007-of-00019.safetensors", - "transformer.h.27.ln_1.weight": "model-00007-of-00019.safetensors", - "transformer.h.27.ln_2.weight": "model-00007-of-00019.safetensors", - "transformer.h.27.mlp.c_proj.weight": "model-00007-of-00019.safetensors", - "transformer.h.27.mlp.w1.weight": "model-00007-of-00019.safetensors", - "transformer.h.27.mlp.w2.weight": "model-00007-of-00019.safetensors", - "transformer.h.28.attn.c_attn.bias": "model-00007-of-00019.safetensors", - "transformer.h.28.attn.c_attn.weight": "model-00007-of-00019.safetensors", - "transformer.h.28.attn.c_proj.weight": "model-00007-of-00019.safetensors", - "transformer.h.28.ln_1.weight": "model-00007-of-00019.safetensors", - "transformer.h.28.ln_2.weight": "model-00007-of-00019.safetensors", - "transformer.h.28.mlp.c_proj.weight": "model-00007-of-00019.safetensors", - "transformer.h.28.mlp.w1.weight": "model-00007-of-00019.safetensors", - "transformer.h.28.mlp.w2.weight": "model-00007-of-00019.safetensors", - "transformer.h.29.attn.c_attn.bias": "model-00007-of-00019.safetensors", - "transformer.h.29.attn.c_attn.weight": "model-00007-of-00019.safetensors", - "transformer.h.29.attn.c_proj.weight": "model-00007-of-00019.safetensors", - "transformer.h.29.ln_1.weight": "model-00007-of-00019.safetensors", - "transformer.h.29.ln_2.weight": "model-00007-of-00019.safetensors", - "transformer.h.29.mlp.c_proj.weight": "model-00007-of-00019.safetensors", - "transformer.h.29.mlp.w1.weight": "model-00007-of-00019.safetensors", - "transformer.h.29.mlp.w2.weight": "model-00007-of-00019.safetensors", - "transformer.h.3.attn.c_attn.bias": "model-00002-of-00019.safetensors", - "transformer.h.3.attn.c_attn.weight": "model-00002-of-00019.safetensors", - "transformer.h.3.attn.c_proj.weight": "model-00002-of-00019.safetensors", - "transformer.h.3.ln_1.weight": "model-00001-of-00019.safetensors", - "transformer.h.3.ln_2.weight": "model-00002-of-00019.safetensors", - "transformer.h.3.mlp.c_proj.weight": "model-00002-of-00019.safetensors", - "transformer.h.3.mlp.w1.weight": "model-00002-of-00019.safetensors", - "transformer.h.3.mlp.w2.weight": "model-00002-of-00019.safetensors", - "transformer.h.30.attn.c_attn.bias": "model-00008-of-00019.safetensors", - "transformer.h.30.attn.c_attn.weight": "model-00008-of-00019.safetensors", - "transformer.h.30.attn.c_proj.weight": "model-00008-of-00019.safetensors", - "transformer.h.30.ln_1.weight": "model-00007-of-00019.safetensors", - "transformer.h.30.ln_2.weight": "model-00008-of-00019.safetensors", - "transformer.h.30.mlp.c_proj.weight": "model-00008-of-00019.safetensors", - "transformer.h.30.mlp.w1.weight": "model-00008-of-00019.safetensors", - "transformer.h.30.mlp.w2.weight": "model-00008-of-00019.safetensors", - "transformer.h.31.attn.c_attn.bias": "model-00008-of-00019.safetensors", - "transformer.h.31.attn.c_attn.weight": "model-00008-of-00019.safetensors", - "transformer.h.31.attn.c_proj.weight": "model-00008-of-00019.safetensors", - "transformer.h.31.ln_1.weight": "model-00008-of-00019.safetensors", - "transformer.h.31.ln_2.weight": "model-00008-of-00019.safetensors", - "transformer.h.31.mlp.c_proj.weight": "model-00008-of-00019.safetensors", - "transformer.h.31.mlp.w1.weight": "model-00008-of-00019.safetensors", - "transformer.h.31.mlp.w2.weight": "model-00008-of-00019.safetensors", - "transformer.h.32.attn.c_attn.bias": "model-00008-of-00019.safetensors", - "transformer.h.32.attn.c_attn.weight": "model-00008-of-00019.safetensors", - "transformer.h.32.attn.c_proj.weight": "model-00008-of-00019.safetensors", - "transformer.h.32.ln_1.weight": "model-00008-of-00019.safetensors", - "transformer.h.32.ln_2.weight": "model-00008-of-00019.safetensors", - "transformer.h.32.mlp.c_proj.weight": "model-00008-of-00019.safetensors", - "transformer.h.32.mlp.w1.weight": "model-00008-of-00019.safetensors", - "transformer.h.32.mlp.w2.weight": "model-00008-of-00019.safetensors", - "transformer.h.33.attn.c_attn.bias": "model-00008-of-00019.safetensors", - "transformer.h.33.attn.c_attn.weight": "model-00008-of-00019.safetensors", - "transformer.h.33.attn.c_proj.weight": "model-00008-of-00019.safetensors", - "transformer.h.33.ln_1.weight": "model-00008-of-00019.safetensors", - "transformer.h.33.ln_2.weight": "model-00008-of-00019.safetensors", - "transformer.h.33.mlp.c_proj.weight": "model-00008-of-00019.safetensors", - "transformer.h.33.mlp.w1.weight": "model-00008-of-00019.safetensors", - "transformer.h.33.mlp.w2.weight": "model-00008-of-00019.safetensors", - "transformer.h.34.attn.c_attn.bias": "model-00008-of-00019.safetensors", - "transformer.h.34.attn.c_attn.weight": "model-00008-of-00019.safetensors", - "transformer.h.34.attn.c_proj.weight": "model-00008-of-00019.safetensors", - "transformer.h.34.ln_1.weight": "model-00008-of-00019.safetensors", - "transformer.h.34.ln_2.weight": "model-00008-of-00019.safetensors", - "transformer.h.34.mlp.c_proj.weight": "model-00009-of-00019.safetensors", - "transformer.h.34.mlp.w1.weight": "model-00008-of-00019.safetensors", - "transformer.h.34.mlp.w2.weight": "model-00009-of-00019.safetensors", - "transformer.h.35.attn.c_attn.bias": "model-00009-of-00019.safetensors", - "transformer.h.35.attn.c_attn.weight": "model-00009-of-00019.safetensors", - "transformer.h.35.attn.c_proj.weight": "model-00009-of-00019.safetensors", - "transformer.h.35.ln_1.weight": "model-00009-of-00019.safetensors", - "transformer.h.35.ln_2.weight": "model-00009-of-00019.safetensors", - "transformer.h.35.mlp.c_proj.weight": "model-00009-of-00019.safetensors", - "transformer.h.35.mlp.w1.weight": "model-00009-of-00019.safetensors", - "transformer.h.35.mlp.w2.weight": "model-00009-of-00019.safetensors", - "transformer.h.36.attn.c_attn.bias": "model-00009-of-00019.safetensors", - "transformer.h.36.attn.c_attn.weight": "model-00009-of-00019.safetensors", - "transformer.h.36.attn.c_proj.weight": "model-00009-of-00019.safetensors", - "transformer.h.36.ln_1.weight": "model-00009-of-00019.safetensors", - "transformer.h.36.ln_2.weight": "model-00009-of-00019.safetensors", - "transformer.h.36.mlp.c_proj.weight": "model-00009-of-00019.safetensors", - "transformer.h.36.mlp.w1.weight": "model-00009-of-00019.safetensors", - "transformer.h.36.mlp.w2.weight": "model-00009-of-00019.safetensors", - "transformer.h.37.attn.c_attn.bias": "model-00009-of-00019.safetensors", - "transformer.h.37.attn.c_attn.weight": "model-00009-of-00019.safetensors", - "transformer.h.37.attn.c_proj.weight": "model-00009-of-00019.safetensors", - "transformer.h.37.ln_1.weight": "model-00009-of-00019.safetensors", - "transformer.h.37.ln_2.weight": "model-00009-of-00019.safetensors", - "transformer.h.37.mlp.c_proj.weight": "model-00009-of-00019.safetensors", - "transformer.h.37.mlp.w1.weight": "model-00009-of-00019.safetensors", - "transformer.h.37.mlp.w2.weight": "model-00009-of-00019.safetensors", - "transformer.h.38.attn.c_attn.bias": "model-00009-of-00019.safetensors", - "transformer.h.38.attn.c_attn.weight": "model-00009-of-00019.safetensors", - "transformer.h.38.attn.c_proj.weight": "model-00009-of-00019.safetensors", - "transformer.h.38.ln_1.weight": "model-00009-of-00019.safetensors", - "transformer.h.38.ln_2.weight": "model-00009-of-00019.safetensors", - "transformer.h.38.mlp.c_proj.weight": "model-00009-of-00019.safetensors", - "transformer.h.38.mlp.w1.weight": "model-00009-of-00019.safetensors", - "transformer.h.38.mlp.w2.weight": "model-00009-of-00019.safetensors", - "transformer.h.39.attn.c_attn.bias": "model-00010-of-00019.safetensors", - "transformer.h.39.attn.c_attn.weight": "model-00010-of-00019.safetensors", - "transformer.h.39.attn.c_proj.weight": "model-00010-of-00019.safetensors", - "transformer.h.39.ln_1.weight": "model-00009-of-00019.safetensors", - "transformer.h.39.ln_2.weight": "model-00010-of-00019.safetensors", - "transformer.h.39.mlp.c_proj.weight": "model-00010-of-00019.safetensors", - "transformer.h.39.mlp.w1.weight": "model-00010-of-00019.safetensors", - "transformer.h.39.mlp.w2.weight": "model-00010-of-00019.safetensors", - "transformer.h.4.attn.c_attn.bias": "model-00002-of-00019.safetensors", - "transformer.h.4.attn.c_attn.weight": "model-00002-of-00019.safetensors", - "transformer.h.4.attn.c_proj.weight": "model-00002-of-00019.safetensors", - "transformer.h.4.ln_1.weight": "model-00002-of-00019.safetensors", - "transformer.h.4.ln_2.weight": "model-00002-of-00019.safetensors", - "transformer.h.4.mlp.c_proj.weight": "model-00002-of-00019.safetensors", - "transformer.h.4.mlp.w1.weight": "model-00002-of-00019.safetensors", - "transformer.h.4.mlp.w2.weight": "model-00002-of-00019.safetensors", - "transformer.h.40.attn.c_attn.bias": "model-00010-of-00019.safetensors", - "transformer.h.40.attn.c_attn.weight": "model-00010-of-00019.safetensors", - "transformer.h.40.attn.c_proj.weight": "model-00010-of-00019.safetensors", - "transformer.h.40.ln_1.weight": "model-00010-of-00019.safetensors", - "transformer.h.40.ln_2.weight": "model-00010-of-00019.safetensors", - "transformer.h.40.mlp.c_proj.weight": "model-00010-of-00019.safetensors", - "transformer.h.40.mlp.w1.weight": "model-00010-of-00019.safetensors", - "transformer.h.40.mlp.w2.weight": "model-00010-of-00019.safetensors", - "transformer.h.41.attn.c_attn.bias": "model-00010-of-00019.safetensors", - "transformer.h.41.attn.c_attn.weight": "model-00010-of-00019.safetensors", - "transformer.h.41.attn.c_proj.weight": "model-00010-of-00019.safetensors", - "transformer.h.41.ln_1.weight": "model-00010-of-00019.safetensors", - "transformer.h.41.ln_2.weight": "model-00010-of-00019.safetensors", - "transformer.h.41.mlp.c_proj.weight": "model-00010-of-00019.safetensors", - "transformer.h.41.mlp.w1.weight": "model-00010-of-00019.safetensors", - "transformer.h.41.mlp.w2.weight": "model-00010-of-00019.safetensors", - "transformer.h.42.attn.c_attn.bias": "model-00010-of-00019.safetensors", - "transformer.h.42.attn.c_attn.weight": "model-00010-of-00019.safetensors", - "transformer.h.42.attn.c_proj.weight": "model-00010-of-00019.safetensors", - "transformer.h.42.ln_1.weight": "model-00010-of-00019.safetensors", - "transformer.h.42.ln_2.weight": "model-00010-of-00019.safetensors", - "transformer.h.42.mlp.c_proj.weight": "model-00010-of-00019.safetensors", - "transformer.h.42.mlp.w1.weight": "model-00010-of-00019.safetensors", - "transformer.h.42.mlp.w2.weight": "model-00010-of-00019.safetensors", - "transformer.h.43.attn.c_attn.bias": "model-00010-of-00019.safetensors", - "transformer.h.43.attn.c_attn.weight": "model-00010-of-00019.safetensors", - "transformer.h.43.attn.c_proj.weight": "model-00010-of-00019.safetensors", - "transformer.h.43.ln_1.weight": "model-00010-of-00019.safetensors", - "transformer.h.43.ln_2.weight": "model-00010-of-00019.safetensors", - "transformer.h.43.mlp.c_proj.weight": "model-00011-of-00019.safetensors", - "transformer.h.43.mlp.w1.weight": "model-00010-of-00019.safetensors", - "transformer.h.43.mlp.w2.weight": "model-00011-of-00019.safetensors", - "transformer.h.44.attn.c_attn.bias": "model-00011-of-00019.safetensors", - "transformer.h.44.attn.c_attn.weight": "model-00011-of-00019.safetensors", - "transformer.h.44.attn.c_proj.weight": "model-00011-of-00019.safetensors", - "transformer.h.44.ln_1.weight": "model-00011-of-00019.safetensors", - "transformer.h.44.ln_2.weight": "model-00011-of-00019.safetensors", - "transformer.h.44.mlp.c_proj.weight": "model-00011-of-00019.safetensors", - "transformer.h.44.mlp.w1.weight": "model-00011-of-00019.safetensors", - "transformer.h.44.mlp.w2.weight": "model-00011-of-00019.safetensors", - "transformer.h.45.attn.c_attn.bias": "model-00011-of-00019.safetensors", - "transformer.h.45.attn.c_attn.weight": "model-00011-of-00019.safetensors", - "transformer.h.45.attn.c_proj.weight": "model-00011-of-00019.safetensors", - "transformer.h.45.ln_1.weight": "model-00011-of-00019.safetensors", - "transformer.h.45.ln_2.weight": "model-00011-of-00019.safetensors", - "transformer.h.45.mlp.c_proj.weight": "model-00011-of-00019.safetensors", - "transformer.h.45.mlp.w1.weight": "model-00011-of-00019.safetensors", - "transformer.h.45.mlp.w2.weight": "model-00011-of-00019.safetensors", - "transformer.h.46.attn.c_attn.bias": "model-00011-of-00019.safetensors", - "transformer.h.46.attn.c_attn.weight": "model-00011-of-00019.safetensors", - "transformer.h.46.attn.c_proj.weight": "model-00011-of-00019.safetensors", - "transformer.h.46.ln_1.weight": "model-00011-of-00019.safetensors", - "transformer.h.46.ln_2.weight": "model-00011-of-00019.safetensors", - "transformer.h.46.mlp.c_proj.weight": "model-00011-of-00019.safetensors", - "transformer.h.46.mlp.w1.weight": "model-00011-of-00019.safetensors", - "transformer.h.46.mlp.w2.weight": "model-00011-of-00019.safetensors", - "transformer.h.47.attn.c_attn.bias": "model-00011-of-00019.safetensors", - "transformer.h.47.attn.c_attn.weight": "model-00011-of-00019.safetensors", - "transformer.h.47.attn.c_proj.weight": "model-00011-of-00019.safetensors", - "transformer.h.47.ln_1.weight": "model-00011-of-00019.safetensors", - "transformer.h.47.ln_2.weight": "model-00011-of-00019.safetensors", - "transformer.h.47.mlp.c_proj.weight": "model-00011-of-00019.safetensors", - "transformer.h.47.mlp.w1.weight": "model-00011-of-00019.safetensors", - "transformer.h.47.mlp.w2.weight": "model-00011-of-00019.safetensors", - "transformer.h.48.attn.c_attn.bias": "model-00012-of-00019.safetensors", - "transformer.h.48.attn.c_attn.weight": "model-00012-of-00019.safetensors", - "transformer.h.48.attn.c_proj.weight": "model-00012-of-00019.safetensors", - "transformer.h.48.ln_1.weight": "model-00011-of-00019.safetensors", - "transformer.h.48.ln_2.weight": "model-00012-of-00019.safetensors", - "transformer.h.48.mlp.c_proj.weight": "model-00012-of-00019.safetensors", - "transformer.h.48.mlp.w1.weight": "model-00012-of-00019.safetensors", - "transformer.h.48.mlp.w2.weight": "model-00012-of-00019.safetensors", - "transformer.h.49.attn.c_attn.bias": "model-00012-of-00019.safetensors", - "transformer.h.49.attn.c_attn.weight": "model-00012-of-00019.safetensors", - "transformer.h.49.attn.c_proj.weight": "model-00012-of-00019.safetensors", - "transformer.h.49.ln_1.weight": "model-00012-of-00019.safetensors", - "transformer.h.49.ln_2.weight": "model-00012-of-00019.safetensors", - "transformer.h.49.mlp.c_proj.weight": "model-00012-of-00019.safetensors", - "transformer.h.49.mlp.w1.weight": "model-00012-of-00019.safetensors", - "transformer.h.49.mlp.w2.weight": "model-00012-of-00019.safetensors", - "transformer.h.5.attn.c_attn.bias": "model-00002-of-00019.safetensors", - "transformer.h.5.attn.c_attn.weight": "model-00002-of-00019.safetensors", - "transformer.h.5.attn.c_proj.weight": "model-00002-of-00019.safetensors", - "transformer.h.5.ln_1.weight": "model-00002-of-00019.safetensors", - "transformer.h.5.ln_2.weight": "model-00002-of-00019.safetensors", - "transformer.h.5.mlp.c_proj.weight": "model-00002-of-00019.safetensors", - "transformer.h.5.mlp.w1.weight": "model-00002-of-00019.safetensors", - "transformer.h.5.mlp.w2.weight": "model-00002-of-00019.safetensors", - "transformer.h.50.attn.c_attn.bias": "model-00012-of-00019.safetensors", - "transformer.h.50.attn.c_attn.weight": "model-00012-of-00019.safetensors", - "transformer.h.50.attn.c_proj.weight": "model-00012-of-00019.safetensors", - "transformer.h.50.ln_1.weight": "model-00012-of-00019.safetensors", - "transformer.h.50.ln_2.weight": "model-00012-of-00019.safetensors", - "transformer.h.50.mlp.c_proj.weight": "model-00012-of-00019.safetensors", - "transformer.h.50.mlp.w1.weight": "model-00012-of-00019.safetensors", - "transformer.h.50.mlp.w2.weight": "model-00012-of-00019.safetensors", - "transformer.h.51.attn.c_attn.bias": "model-00012-of-00019.safetensors", - "transformer.h.51.attn.c_attn.weight": "model-00012-of-00019.safetensors", - "transformer.h.51.attn.c_proj.weight": "model-00012-of-00019.safetensors", - "transformer.h.51.ln_1.weight": "model-00012-of-00019.safetensors", - "transformer.h.51.ln_2.weight": "model-00012-of-00019.safetensors", - "transformer.h.51.mlp.c_proj.weight": "model-00012-of-00019.safetensors", - "transformer.h.51.mlp.w1.weight": "model-00012-of-00019.safetensors", - "transformer.h.51.mlp.w2.weight": "model-00012-of-00019.safetensors", - "transformer.h.52.attn.c_attn.bias": "model-00012-of-00019.safetensors", - "transformer.h.52.attn.c_attn.weight": "model-00012-of-00019.safetensors", - "transformer.h.52.attn.c_proj.weight": "model-00012-of-00019.safetensors", - "transformer.h.52.ln_1.weight": "model-00012-of-00019.safetensors", - "transformer.h.52.ln_2.weight": "model-00012-of-00019.safetensors", - "transformer.h.52.mlp.c_proj.weight": "model-00013-of-00019.safetensors", - "transformer.h.52.mlp.w1.weight": "model-00012-of-00019.safetensors", - "transformer.h.52.mlp.w2.weight": "model-00013-of-00019.safetensors", - "transformer.h.53.attn.c_attn.bias": "model-00013-of-00019.safetensors", - "transformer.h.53.attn.c_attn.weight": "model-00013-of-00019.safetensors", - "transformer.h.53.attn.c_proj.weight": "model-00013-of-00019.safetensors", - "transformer.h.53.ln_1.weight": "model-00013-of-00019.safetensors", - "transformer.h.53.ln_2.weight": "model-00013-of-00019.safetensors", - "transformer.h.53.mlp.c_proj.weight": "model-00013-of-00019.safetensors", - "transformer.h.53.mlp.w1.weight": "model-00013-of-00019.safetensors", - "transformer.h.53.mlp.w2.weight": "model-00013-of-00019.safetensors", - "transformer.h.54.attn.c_attn.bias": "model-00013-of-00019.safetensors", - "transformer.h.54.attn.c_attn.weight": "model-00013-of-00019.safetensors", - "transformer.h.54.attn.c_proj.weight": "model-00013-of-00019.safetensors", - "transformer.h.54.ln_1.weight": "model-00013-of-00019.safetensors", - "transformer.h.54.ln_2.weight": "model-00013-of-00019.safetensors", - "transformer.h.54.mlp.c_proj.weight": "model-00013-of-00019.safetensors", - "transformer.h.54.mlp.w1.weight": "model-00013-of-00019.safetensors", - "transformer.h.54.mlp.w2.weight": "model-00013-of-00019.safetensors", - "transformer.h.55.attn.c_attn.bias": "model-00013-of-00019.safetensors", - "transformer.h.55.attn.c_attn.weight": "model-00013-of-00019.safetensors", - "transformer.h.55.attn.c_proj.weight": "model-00013-of-00019.safetensors", - "transformer.h.55.ln_1.weight": "model-00013-of-00019.safetensors", - "transformer.h.55.ln_2.weight": "model-00013-of-00019.safetensors", - "transformer.h.55.mlp.c_proj.weight": "model-00013-of-00019.safetensors", - "transformer.h.55.mlp.w1.weight": "model-00013-of-00019.safetensors", - "transformer.h.55.mlp.w2.weight": "model-00013-of-00019.safetensors", - "transformer.h.56.attn.c_attn.bias": "model-00013-of-00019.safetensors", - "transformer.h.56.attn.c_attn.weight": "model-00013-of-00019.safetensors", - "transformer.h.56.attn.c_proj.weight": "model-00013-of-00019.safetensors", - "transformer.h.56.ln_1.weight": "model-00013-of-00019.safetensors", - "transformer.h.56.ln_2.weight": "model-00013-of-00019.safetensors", - "transformer.h.56.mlp.c_proj.weight": "model-00013-of-00019.safetensors", - "transformer.h.56.mlp.w1.weight": "model-00013-of-00019.safetensors", - "transformer.h.56.mlp.w2.weight": "model-00013-of-00019.safetensors", - "transformer.h.57.attn.c_attn.bias": "model-00014-of-00019.safetensors", - "transformer.h.57.attn.c_attn.weight": "model-00014-of-00019.safetensors", - "transformer.h.57.attn.c_proj.weight": "model-00014-of-00019.safetensors", - "transformer.h.57.ln_1.weight": "model-00013-of-00019.safetensors", - "transformer.h.57.ln_2.weight": "model-00014-of-00019.safetensors", - "transformer.h.57.mlp.c_proj.weight": "model-00014-of-00019.safetensors", - "transformer.h.57.mlp.w1.weight": "model-00014-of-00019.safetensors", - "transformer.h.57.mlp.w2.weight": "model-00014-of-00019.safetensors", - "transformer.h.58.attn.c_attn.bias": "model-00014-of-00019.safetensors", - "transformer.h.58.attn.c_attn.weight": "model-00014-of-00019.safetensors", - "transformer.h.58.attn.c_proj.weight": "model-00014-of-00019.safetensors", - "transformer.h.58.ln_1.weight": "model-00014-of-00019.safetensors", - "transformer.h.58.ln_2.weight": "model-00014-of-00019.safetensors", - "transformer.h.58.mlp.c_proj.weight": "model-00014-of-00019.safetensors", - "transformer.h.58.mlp.w1.weight": "model-00014-of-00019.safetensors", - "transformer.h.58.mlp.w2.weight": "model-00014-of-00019.safetensors", - "transformer.h.59.attn.c_attn.bias": "model-00014-of-00019.safetensors", - "transformer.h.59.attn.c_attn.weight": "model-00014-of-00019.safetensors", - "transformer.h.59.attn.c_proj.weight": "model-00014-of-00019.safetensors", - "transformer.h.59.ln_1.weight": "model-00014-of-00019.safetensors", - "transformer.h.59.ln_2.weight": "model-00014-of-00019.safetensors", - "transformer.h.59.mlp.c_proj.weight": "model-00014-of-00019.safetensors", - "transformer.h.59.mlp.w1.weight": "model-00014-of-00019.safetensors", - "transformer.h.59.mlp.w2.weight": "model-00014-of-00019.safetensors", - "transformer.h.6.attn.c_attn.bias": "model-00002-of-00019.safetensors", - "transformer.h.6.attn.c_attn.weight": "model-00002-of-00019.safetensors", - "transformer.h.6.attn.c_proj.weight": "model-00002-of-00019.safetensors", - "transformer.h.6.ln_1.weight": "model-00002-of-00019.safetensors", - "transformer.h.6.ln_2.weight": "model-00002-of-00019.safetensors", - "transformer.h.6.mlp.c_proj.weight": "model-00002-of-00019.safetensors", - "transformer.h.6.mlp.w1.weight": "model-00002-of-00019.safetensors", - "transformer.h.6.mlp.w2.weight": "model-00002-of-00019.safetensors", - "transformer.h.60.attn.c_attn.bias": "model-00014-of-00019.safetensors", - "transformer.h.60.attn.c_attn.weight": "model-00014-of-00019.safetensors", - "transformer.h.60.attn.c_proj.weight": "model-00014-of-00019.safetensors", - "transformer.h.60.ln_1.weight": "model-00014-of-00019.safetensors", - "transformer.h.60.ln_2.weight": "model-00014-of-00019.safetensors", - "transformer.h.60.mlp.c_proj.weight": "model-00014-of-00019.safetensors", - "transformer.h.60.mlp.w1.weight": "model-00014-of-00019.safetensors", - "transformer.h.60.mlp.w2.weight": "model-00014-of-00019.safetensors", - "transformer.h.61.attn.c_attn.bias": "model-00014-of-00019.safetensors", - "transformer.h.61.attn.c_attn.weight": "model-00014-of-00019.safetensors", - "transformer.h.61.attn.c_proj.weight": "model-00014-of-00019.safetensors", - "transformer.h.61.ln_1.weight": "model-00014-of-00019.safetensors", - "transformer.h.61.ln_2.weight": "model-00014-of-00019.safetensors", - "transformer.h.61.mlp.c_proj.weight": "model-00015-of-00019.safetensors", - "transformer.h.61.mlp.w1.weight": "model-00014-of-00019.safetensors", - "transformer.h.61.mlp.w2.weight": "model-00015-of-00019.safetensors", - "transformer.h.62.attn.c_attn.bias": "model-00015-of-00019.safetensors", - "transformer.h.62.attn.c_attn.weight": "model-00015-of-00019.safetensors", - "transformer.h.62.attn.c_proj.weight": "model-00015-of-00019.safetensors", - "transformer.h.62.ln_1.weight": "model-00015-of-00019.safetensors", - "transformer.h.62.ln_2.weight": "model-00015-of-00019.safetensors", - "transformer.h.62.mlp.c_proj.weight": "model-00015-of-00019.safetensors", - "transformer.h.62.mlp.w1.weight": "model-00015-of-00019.safetensors", - "transformer.h.62.mlp.w2.weight": "model-00015-of-00019.safetensors", - "transformer.h.63.attn.c_attn.bias": "model-00015-of-00019.safetensors", - "transformer.h.63.attn.c_attn.weight": "model-00015-of-00019.safetensors", - "transformer.h.63.attn.c_proj.weight": "model-00015-of-00019.safetensors", - "transformer.h.63.ln_1.weight": "model-00015-of-00019.safetensors", - "transformer.h.63.ln_2.weight": "model-00015-of-00019.safetensors", - "transformer.h.63.mlp.c_proj.weight": "model-00015-of-00019.safetensors", - "transformer.h.63.mlp.w1.weight": "model-00015-of-00019.safetensors", - "transformer.h.63.mlp.w2.weight": "model-00015-of-00019.safetensors", - "transformer.h.64.attn.c_attn.bias": "model-00015-of-00019.safetensors", - "transformer.h.64.attn.c_attn.weight": "model-00015-of-00019.safetensors", - "transformer.h.64.attn.c_proj.weight": "model-00015-of-00019.safetensors", - "transformer.h.64.ln_1.weight": "model-00015-of-00019.safetensors", - "transformer.h.64.ln_2.weight": "model-00015-of-00019.safetensors", - "transformer.h.64.mlp.c_proj.weight": "model-00015-of-00019.safetensors", - "transformer.h.64.mlp.w1.weight": "model-00015-of-00019.safetensors", - "transformer.h.64.mlp.w2.weight": "model-00015-of-00019.safetensors", - "transformer.h.65.attn.c_attn.bias": "model-00015-of-00019.safetensors", - "transformer.h.65.attn.c_attn.weight": "model-00015-of-00019.safetensors", - "transformer.h.65.attn.c_proj.weight": "model-00015-of-00019.safetensors", - "transformer.h.65.ln_1.weight": "model-00015-of-00019.safetensors", - "transformer.h.65.ln_2.weight": "model-00015-of-00019.safetensors", - "transformer.h.65.mlp.c_proj.weight": "model-00015-of-00019.safetensors", - "transformer.h.65.mlp.w1.weight": "model-00015-of-00019.safetensors", - "transformer.h.65.mlp.w2.weight": "model-00015-of-00019.safetensors", - "transformer.h.66.attn.c_attn.bias": "model-00016-of-00019.safetensors", - "transformer.h.66.attn.c_attn.weight": "model-00016-of-00019.safetensors", - "transformer.h.66.attn.c_proj.weight": "model-00016-of-00019.safetensors", - "transformer.h.66.ln_1.weight": "model-00015-of-00019.safetensors", - "transformer.h.66.ln_2.weight": "model-00016-of-00019.safetensors", - "transformer.h.66.mlp.c_proj.weight": "model-00016-of-00019.safetensors", - "transformer.h.66.mlp.w1.weight": "model-00016-of-00019.safetensors", - "transformer.h.66.mlp.w2.weight": "model-00016-of-00019.safetensors", - "transformer.h.67.attn.c_attn.bias": "model-00016-of-00019.safetensors", - "transformer.h.67.attn.c_attn.weight": "model-00016-of-00019.safetensors", - "transformer.h.67.attn.c_proj.weight": "model-00016-of-00019.safetensors", - "transformer.h.67.ln_1.weight": "model-00016-of-00019.safetensors", - "transformer.h.67.ln_2.weight": "model-00016-of-00019.safetensors", - "transformer.h.67.mlp.c_proj.weight": "model-00016-of-00019.safetensors", - "transformer.h.67.mlp.w1.weight": "model-00016-of-00019.safetensors", - "transformer.h.67.mlp.w2.weight": "model-00016-of-00019.safetensors", - "transformer.h.68.attn.c_attn.bias": "model-00016-of-00019.safetensors", - "transformer.h.68.attn.c_attn.weight": "model-00016-of-00019.safetensors", - "transformer.h.68.attn.c_proj.weight": "model-00016-of-00019.safetensors", - "transformer.h.68.ln_1.weight": "model-00016-of-00019.safetensors", - "transformer.h.68.ln_2.weight": "model-00016-of-00019.safetensors", - "transformer.h.68.mlp.c_proj.weight": "model-00016-of-00019.safetensors", - "transformer.h.68.mlp.w1.weight": "model-00016-of-00019.safetensors", - "transformer.h.68.mlp.w2.weight": "model-00016-of-00019.safetensors", - "transformer.h.69.attn.c_attn.bias": "model-00016-of-00019.safetensors", - "transformer.h.69.attn.c_attn.weight": "model-00016-of-00019.safetensors", - "transformer.h.69.attn.c_proj.weight": "model-00016-of-00019.safetensors", - "transformer.h.69.ln_1.weight": "model-00016-of-00019.safetensors", - "transformer.h.69.ln_2.weight": "model-00016-of-00019.safetensors", - "transformer.h.69.mlp.c_proj.weight": "model-00016-of-00019.safetensors", - "transformer.h.69.mlp.w1.weight": "model-00016-of-00019.safetensors", - "transformer.h.69.mlp.w2.weight": "model-00016-of-00019.safetensors", - "transformer.h.7.attn.c_attn.bias": "model-00002-of-00019.safetensors", - "transformer.h.7.attn.c_attn.weight": "model-00002-of-00019.safetensors", - "transformer.h.7.attn.c_proj.weight": "model-00002-of-00019.safetensors", - "transformer.h.7.ln_1.weight": "model-00002-of-00019.safetensors", - "transformer.h.7.ln_2.weight": "model-00002-of-00019.safetensors", - "transformer.h.7.mlp.c_proj.weight": "model-00003-of-00019.safetensors", - "transformer.h.7.mlp.w1.weight": "model-00002-of-00019.safetensors", - "transformer.h.7.mlp.w2.weight": "model-00003-of-00019.safetensors", - "transformer.h.70.attn.c_attn.bias": "model-00016-of-00019.safetensors", - "transformer.h.70.attn.c_attn.weight": "model-00016-of-00019.safetensors", - "transformer.h.70.attn.c_proj.weight": "model-00016-of-00019.safetensors", - "transformer.h.70.ln_1.weight": "model-00016-of-00019.safetensors", - "transformer.h.70.ln_2.weight": "model-00016-of-00019.safetensors", - "transformer.h.70.mlp.c_proj.weight": "model-00017-of-00019.safetensors", - "transformer.h.70.mlp.w1.weight": "model-00016-of-00019.safetensors", - "transformer.h.70.mlp.w2.weight": "model-00017-of-00019.safetensors", - "transformer.h.71.attn.c_attn.bias": "model-00017-of-00019.safetensors", - "transformer.h.71.attn.c_attn.weight": "model-00017-of-00019.safetensors", - "transformer.h.71.attn.c_proj.weight": "model-00017-of-00019.safetensors", - "transformer.h.71.ln_1.weight": "model-00017-of-00019.safetensors", - "transformer.h.71.ln_2.weight": "model-00017-of-00019.safetensors", - "transformer.h.71.mlp.c_proj.weight": "model-00017-of-00019.safetensors", - "transformer.h.71.mlp.w1.weight": "model-00017-of-00019.safetensors", - "transformer.h.71.mlp.w2.weight": "model-00017-of-00019.safetensors", - "transformer.h.72.attn.c_attn.bias": "model-00017-of-00019.safetensors", - "transformer.h.72.attn.c_attn.weight": "model-00017-of-00019.safetensors", - "transformer.h.72.attn.c_proj.weight": "model-00017-of-00019.safetensors", - "transformer.h.72.ln_1.weight": "model-00017-of-00019.safetensors", - "transformer.h.72.ln_2.weight": "model-00017-of-00019.safetensors", - "transformer.h.72.mlp.c_proj.weight": "model-00017-of-00019.safetensors", - "transformer.h.72.mlp.w1.weight": "model-00017-of-00019.safetensors", - "transformer.h.72.mlp.w2.weight": "model-00017-of-00019.safetensors", - "transformer.h.73.attn.c_attn.bias": "model-00017-of-00019.safetensors", - "transformer.h.73.attn.c_attn.weight": "model-00017-of-00019.safetensors", - "transformer.h.73.attn.c_proj.weight": "model-00017-of-00019.safetensors", - "transformer.h.73.ln_1.weight": "model-00017-of-00019.safetensors", - "transformer.h.73.ln_2.weight": "model-00017-of-00019.safetensors", - "transformer.h.73.mlp.c_proj.weight": "model-00017-of-00019.safetensors", - "transformer.h.73.mlp.w1.weight": "model-00017-of-00019.safetensors", - "transformer.h.73.mlp.w2.weight": "model-00017-of-00019.safetensors", - "transformer.h.74.attn.c_attn.bias": "model-00017-of-00019.safetensors", - "transformer.h.74.attn.c_attn.weight": "model-00017-of-00019.safetensors", - "transformer.h.74.attn.c_proj.weight": "model-00017-of-00019.safetensors", - "transformer.h.74.ln_1.weight": "model-00017-of-00019.safetensors", - "transformer.h.74.ln_2.weight": "model-00017-of-00019.safetensors", - "transformer.h.74.mlp.c_proj.weight": "model-00017-of-00019.safetensors", - "transformer.h.74.mlp.w1.weight": "model-00017-of-00019.safetensors", - "transformer.h.74.mlp.w2.weight": "model-00017-of-00019.safetensors", - "transformer.h.75.attn.c_attn.bias": "model-00018-of-00019.safetensors", - "transformer.h.75.attn.c_attn.weight": "model-00018-of-00019.safetensors", - "transformer.h.75.attn.c_proj.weight": "model-00018-of-00019.safetensors", - "transformer.h.75.ln_1.weight": "model-00017-of-00019.safetensors", - "transformer.h.75.ln_2.weight": "model-00018-of-00019.safetensors", - "transformer.h.75.mlp.c_proj.weight": "model-00018-of-00019.safetensors", - "transformer.h.75.mlp.w1.weight": "model-00018-of-00019.safetensors", - "transformer.h.75.mlp.w2.weight": "model-00018-of-00019.safetensors", - "transformer.h.76.attn.c_attn.bias": "model-00018-of-00019.safetensors", - "transformer.h.76.attn.c_attn.weight": "model-00018-of-00019.safetensors", - "transformer.h.76.attn.c_proj.weight": "model-00018-of-00019.safetensors", - "transformer.h.76.ln_1.weight": "model-00018-of-00019.safetensors", - "transformer.h.76.ln_2.weight": "model-00018-of-00019.safetensors", - "transformer.h.76.mlp.c_proj.weight": "model-00018-of-00019.safetensors", - "transformer.h.76.mlp.w1.weight": "model-00018-of-00019.safetensors", - "transformer.h.76.mlp.w2.weight": "model-00018-of-00019.safetensors", - "transformer.h.77.attn.c_attn.bias": "model-00018-of-00019.safetensors", - "transformer.h.77.attn.c_attn.weight": "model-00018-of-00019.safetensors", - "transformer.h.77.attn.c_proj.weight": "model-00018-of-00019.safetensors", - "transformer.h.77.ln_1.weight": "model-00018-of-00019.safetensors", - "transformer.h.77.ln_2.weight": "model-00018-of-00019.safetensors", - "transformer.h.77.mlp.c_proj.weight": "model-00018-of-00019.safetensors", - "transformer.h.77.mlp.w1.weight": "model-00018-of-00019.safetensors", - "transformer.h.77.mlp.w2.weight": "model-00018-of-00019.safetensors", - "transformer.h.78.attn.c_attn.bias": "model-00018-of-00019.safetensors", - "transformer.h.78.attn.c_attn.weight": "model-00018-of-00019.safetensors", - "transformer.h.78.attn.c_proj.weight": "model-00018-of-00019.safetensors", - "transformer.h.78.ln_1.weight": "model-00018-of-00019.safetensors", - "transformer.h.78.ln_2.weight": "model-00018-of-00019.safetensors", - "transformer.h.78.mlp.c_proj.weight": "model-00018-of-00019.safetensors", - "transformer.h.78.mlp.w1.weight": "model-00018-of-00019.safetensors", - "transformer.h.78.mlp.w2.weight": "model-00018-of-00019.safetensors", - "transformer.h.79.attn.c_attn.bias": "model-00018-of-00019.safetensors", - "transformer.h.79.attn.c_attn.weight": "model-00018-of-00019.safetensors", - "transformer.h.79.attn.c_proj.weight": "model-00018-of-00019.safetensors", - "transformer.h.79.ln_1.weight": "model-00018-of-00019.safetensors", - "transformer.h.79.ln_2.weight": "model-00018-of-00019.safetensors", - "transformer.h.79.mlp.c_proj.weight": "model-00019-of-00019.safetensors", - "transformer.h.79.mlp.w1.weight": "model-00018-of-00019.safetensors", - "transformer.h.79.mlp.w2.weight": "model-00019-of-00019.safetensors", - "transformer.h.8.attn.c_attn.bias": "model-00003-of-00019.safetensors", - "transformer.h.8.attn.c_attn.weight": "model-00003-of-00019.safetensors", - "transformer.h.8.attn.c_proj.weight": "model-00003-of-00019.safetensors", - "transformer.h.8.ln_1.weight": "model-00003-of-00019.safetensors", - "transformer.h.8.ln_2.weight": "model-00003-of-00019.safetensors", - "transformer.h.8.mlp.c_proj.weight": "model-00003-of-00019.safetensors", - "transformer.h.8.mlp.w1.weight": "model-00003-of-00019.safetensors", - "transformer.h.8.mlp.w2.weight": "model-00003-of-00019.safetensors", - "transformer.h.9.attn.c_attn.bias": "model-00003-of-00019.safetensors", - "transformer.h.9.attn.c_attn.weight": "model-00003-of-00019.safetensors", - "transformer.h.9.attn.c_proj.weight": "model-00003-of-00019.safetensors", - "transformer.h.9.ln_1.weight": "model-00003-of-00019.safetensors", - "transformer.h.9.ln_2.weight": "model-00003-of-00019.safetensors", - "transformer.h.9.mlp.c_proj.weight": "model-00003-of-00019.safetensors", - "transformer.h.9.mlp.w1.weight": "model-00003-of-00019.safetensors", - "transformer.h.9.mlp.w2.weight": "model-00003-of-00019.safetensors", - "transformer.ln_f.weight": "model-00019-of-00019.safetensors", - "transformer.wte.weight": "model-00001-of-00019.safetensors" + "lm_head.weight": "model-00082-of-00082.safetensors", + "transformer.h.0.attn.c_attn.bias": "model-00002-of-00082.safetensors", + "transformer.h.0.attn.c_attn.weight": "model-00002-of-00082.safetensors", + "transformer.h.0.attn.c_proj.weight": "model-00002-of-00082.safetensors", + "transformer.h.0.ln_1.weight": "model-00002-of-00082.safetensors", + "transformer.h.0.ln_2.weight": "model-00002-of-00082.safetensors", + "transformer.h.0.mlp.c_proj.weight": "model-00002-of-00082.safetensors", + "transformer.h.0.mlp.w1.weight": "model-00002-of-00082.safetensors", + "transformer.h.0.mlp.w2.weight": "model-00002-of-00082.safetensors", + "transformer.h.1.attn.c_attn.bias": "model-00003-of-00082.safetensors", + "transformer.h.1.attn.c_attn.weight": "model-00003-of-00082.safetensors", + "transformer.h.1.attn.c_proj.weight": "model-00003-of-00082.safetensors", + "transformer.h.1.ln_1.weight": "model-00002-of-00082.safetensors", + "transformer.h.1.ln_2.weight": "model-00003-of-00082.safetensors", + "transformer.h.1.mlp.c_proj.weight": "model-00003-of-00082.safetensors", + "transformer.h.1.mlp.w1.weight": "model-00003-of-00082.safetensors", + "transformer.h.1.mlp.w2.weight": "model-00003-of-00082.safetensors", + "transformer.h.10.attn.c_attn.bias": "model-00012-of-00082.safetensors", + "transformer.h.10.attn.c_attn.weight": "model-00012-of-00082.safetensors", + "transformer.h.10.attn.c_proj.weight": "model-00012-of-00082.safetensors", + "transformer.h.10.ln_1.weight": "model-00011-of-00082.safetensors", + "transformer.h.10.ln_2.weight": "model-00012-of-00082.safetensors", + "transformer.h.10.mlp.c_proj.weight": "model-00012-of-00082.safetensors", + "transformer.h.10.mlp.w1.weight": "model-00012-of-00082.safetensors", + "transformer.h.10.mlp.w2.weight": "model-00012-of-00082.safetensors", + "transformer.h.11.attn.c_attn.bias": "model-00013-of-00082.safetensors", + "transformer.h.11.attn.c_attn.weight": "model-00013-of-00082.safetensors", + "transformer.h.11.attn.c_proj.weight": "model-00013-of-00082.safetensors", + "transformer.h.11.ln_1.weight": "model-00012-of-00082.safetensors", + "transformer.h.11.ln_2.weight": "model-00013-of-00082.safetensors", + "transformer.h.11.mlp.c_proj.weight": "model-00013-of-00082.safetensors", + "transformer.h.11.mlp.w1.weight": "model-00013-of-00082.safetensors", + "transformer.h.11.mlp.w2.weight": "model-00013-of-00082.safetensors", + "transformer.h.12.attn.c_attn.bias": "model-00014-of-00082.safetensors", + "transformer.h.12.attn.c_attn.weight": "model-00014-of-00082.safetensors", + "transformer.h.12.attn.c_proj.weight": "model-00014-of-00082.safetensors", + "transformer.h.12.ln_1.weight": "model-00013-of-00082.safetensors", + "transformer.h.12.ln_2.weight": "model-00014-of-00082.safetensors", + "transformer.h.12.mlp.c_proj.weight": "model-00014-of-00082.safetensors", + "transformer.h.12.mlp.w1.weight": "model-00014-of-00082.safetensors", + "transformer.h.12.mlp.w2.weight": "model-00014-of-00082.safetensors", + "transformer.h.13.attn.c_attn.bias": "model-00015-of-00082.safetensors", + "transformer.h.13.attn.c_attn.weight": "model-00015-of-00082.safetensors", + "transformer.h.13.attn.c_proj.weight": "model-00015-of-00082.safetensors", + "transformer.h.13.ln_1.weight": "model-00014-of-00082.safetensors", + "transformer.h.13.ln_2.weight": "model-00015-of-00082.safetensors", + "transformer.h.13.mlp.c_proj.weight": "model-00015-of-00082.safetensors", + "transformer.h.13.mlp.w1.weight": "model-00015-of-00082.safetensors", + "transformer.h.13.mlp.w2.weight": "model-00015-of-00082.safetensors", + "transformer.h.14.attn.c_attn.bias": "model-00016-of-00082.safetensors", + "transformer.h.14.attn.c_attn.weight": "model-00016-of-00082.safetensors", + "transformer.h.14.attn.c_proj.weight": "model-00016-of-00082.safetensors", + "transformer.h.14.ln_1.weight": "model-00015-of-00082.safetensors", + "transformer.h.14.ln_2.weight": "model-00016-of-00082.safetensors", + "transformer.h.14.mlp.c_proj.weight": "model-00016-of-00082.safetensors", + "transformer.h.14.mlp.w1.weight": "model-00016-of-00082.safetensors", + "transformer.h.14.mlp.w2.weight": "model-00016-of-00082.safetensors", + "transformer.h.15.attn.c_attn.bias": "model-00017-of-00082.safetensors", + "transformer.h.15.attn.c_attn.weight": "model-00017-of-00082.safetensors", + "transformer.h.15.attn.c_proj.weight": "model-00017-of-00082.safetensors", + "transformer.h.15.ln_1.weight": "model-00016-of-00082.safetensors", + "transformer.h.15.ln_2.weight": "model-00017-of-00082.safetensors", + "transformer.h.15.mlp.c_proj.weight": "model-00017-of-00082.safetensors", + "transformer.h.15.mlp.w1.weight": "model-00017-of-00082.safetensors", + "transformer.h.15.mlp.w2.weight": "model-00017-of-00082.safetensors", + "transformer.h.16.attn.c_attn.bias": "model-00018-of-00082.safetensors", + "transformer.h.16.attn.c_attn.weight": "model-00018-of-00082.safetensors", + "transformer.h.16.attn.c_proj.weight": "model-00018-of-00082.safetensors", + "transformer.h.16.ln_1.weight": "model-00017-of-00082.safetensors", + "transformer.h.16.ln_2.weight": "model-00018-of-00082.safetensors", + "transformer.h.16.mlp.c_proj.weight": "model-00018-of-00082.safetensors", + "transformer.h.16.mlp.w1.weight": "model-00018-of-00082.safetensors", + "transformer.h.16.mlp.w2.weight": "model-00018-of-00082.safetensors", + "transformer.h.17.attn.c_attn.bias": "model-00019-of-00082.safetensors", + "transformer.h.17.attn.c_attn.weight": "model-00019-of-00082.safetensors", + "transformer.h.17.attn.c_proj.weight": "model-00019-of-00082.safetensors", + "transformer.h.17.ln_1.weight": "model-00018-of-00082.safetensors", + "transformer.h.17.ln_2.weight": "model-00019-of-00082.safetensors", + "transformer.h.17.mlp.c_proj.weight": "model-00019-of-00082.safetensors", + "transformer.h.17.mlp.w1.weight": "model-00019-of-00082.safetensors", + "transformer.h.17.mlp.w2.weight": "model-00019-of-00082.safetensors", + "transformer.h.18.attn.c_attn.bias": "model-00020-of-00082.safetensors", + "transformer.h.18.attn.c_attn.weight": "model-00020-of-00082.safetensors", + "transformer.h.18.attn.c_proj.weight": "model-00020-of-00082.safetensors", + "transformer.h.18.ln_1.weight": "model-00019-of-00082.safetensors", + "transformer.h.18.ln_2.weight": "model-00020-of-00082.safetensors", + "transformer.h.18.mlp.c_proj.weight": "model-00020-of-00082.safetensors", + "transformer.h.18.mlp.w1.weight": "model-00020-of-00082.safetensors", + "transformer.h.18.mlp.w2.weight": "model-00020-of-00082.safetensors", + "transformer.h.19.attn.c_attn.bias": "model-00021-of-00082.safetensors", + "transformer.h.19.attn.c_attn.weight": "model-00021-of-00082.safetensors", + "transformer.h.19.attn.c_proj.weight": "model-00021-of-00082.safetensors", + "transformer.h.19.ln_1.weight": "model-00020-of-00082.safetensors", + "transformer.h.19.ln_2.weight": "model-00021-of-00082.safetensors", + "transformer.h.19.mlp.c_proj.weight": "model-00021-of-00082.safetensors", + "transformer.h.19.mlp.w1.weight": "model-00021-of-00082.safetensors", + "transformer.h.19.mlp.w2.weight": "model-00021-of-00082.safetensors", + "transformer.h.2.attn.c_attn.bias": "model-00004-of-00082.safetensors", + "transformer.h.2.attn.c_attn.weight": "model-00004-of-00082.safetensors", + "transformer.h.2.attn.c_proj.weight": "model-00004-of-00082.safetensors", + "transformer.h.2.ln_1.weight": "model-00003-of-00082.safetensors", + "transformer.h.2.ln_2.weight": "model-00004-of-00082.safetensors", + "transformer.h.2.mlp.c_proj.weight": "model-00004-of-00082.safetensors", + "transformer.h.2.mlp.w1.weight": "model-00004-of-00082.safetensors", + "transformer.h.2.mlp.w2.weight": "model-00004-of-00082.safetensors", + "transformer.h.20.attn.c_attn.bias": "model-00022-of-00082.safetensors", + "transformer.h.20.attn.c_attn.weight": "model-00022-of-00082.safetensors", + "transformer.h.20.attn.c_proj.weight": "model-00022-of-00082.safetensors", + "transformer.h.20.ln_1.weight": "model-00021-of-00082.safetensors", + "transformer.h.20.ln_2.weight": "model-00022-of-00082.safetensors", + "transformer.h.20.mlp.c_proj.weight": "model-00022-of-00082.safetensors", + "transformer.h.20.mlp.w1.weight": "model-00022-of-00082.safetensors", + "transformer.h.20.mlp.w2.weight": "model-00022-of-00082.safetensors", + "transformer.h.21.attn.c_attn.bias": "model-00023-of-00082.safetensors", + "transformer.h.21.attn.c_attn.weight": "model-00023-of-00082.safetensors", + "transformer.h.21.attn.c_proj.weight": "model-00023-of-00082.safetensors", + "transformer.h.21.ln_1.weight": "model-00022-of-00082.safetensors", + "transformer.h.21.ln_2.weight": "model-00023-of-00082.safetensors", + "transformer.h.21.mlp.c_proj.weight": "model-00023-of-00082.safetensors", + "transformer.h.21.mlp.w1.weight": "model-00023-of-00082.safetensors", + "transformer.h.21.mlp.w2.weight": "model-00023-of-00082.safetensors", + "transformer.h.22.attn.c_attn.bias": "model-00024-of-00082.safetensors", + "transformer.h.22.attn.c_attn.weight": "model-00024-of-00082.safetensors", + "transformer.h.22.attn.c_proj.weight": "model-00024-of-00082.safetensors", + "transformer.h.22.ln_1.weight": "model-00023-of-00082.safetensors", + "transformer.h.22.ln_2.weight": "model-00024-of-00082.safetensors", + "transformer.h.22.mlp.c_proj.weight": "model-00024-of-00082.safetensors", + "transformer.h.22.mlp.w1.weight": "model-00024-of-00082.safetensors", + "transformer.h.22.mlp.w2.weight": "model-00024-of-00082.safetensors", + "transformer.h.23.attn.c_attn.bias": "model-00025-of-00082.safetensors", + "transformer.h.23.attn.c_attn.weight": "model-00025-of-00082.safetensors", + "transformer.h.23.attn.c_proj.weight": "model-00025-of-00082.safetensors", + "transformer.h.23.ln_1.weight": "model-00024-of-00082.safetensors", + "transformer.h.23.ln_2.weight": "model-00025-of-00082.safetensors", + "transformer.h.23.mlp.c_proj.weight": "model-00025-of-00082.safetensors", + "transformer.h.23.mlp.w1.weight": "model-00025-of-00082.safetensors", + "transformer.h.23.mlp.w2.weight": "model-00025-of-00082.safetensors", + "transformer.h.24.attn.c_attn.bias": "model-00026-of-00082.safetensors", + "transformer.h.24.attn.c_attn.weight": "model-00026-of-00082.safetensors", + "transformer.h.24.attn.c_proj.weight": "model-00026-of-00082.safetensors", + "transformer.h.24.ln_1.weight": "model-00025-of-00082.safetensors", + "transformer.h.24.ln_2.weight": "model-00026-of-00082.safetensors", + "transformer.h.24.mlp.c_proj.weight": "model-00026-of-00082.safetensors", + "transformer.h.24.mlp.w1.weight": "model-00026-of-00082.safetensors", + "transformer.h.24.mlp.w2.weight": "model-00026-of-00082.safetensors", + "transformer.h.25.attn.c_attn.bias": "model-00027-of-00082.safetensors", + "transformer.h.25.attn.c_attn.weight": "model-00027-of-00082.safetensors", + "transformer.h.25.attn.c_proj.weight": "model-00027-of-00082.safetensors", + "transformer.h.25.ln_1.weight": "model-00026-of-00082.safetensors", + "transformer.h.25.ln_2.weight": "model-00027-of-00082.safetensors", + "transformer.h.25.mlp.c_proj.weight": "model-00027-of-00082.safetensors", + "transformer.h.25.mlp.w1.weight": "model-00027-of-00082.safetensors", + "transformer.h.25.mlp.w2.weight": "model-00027-of-00082.safetensors", + "transformer.h.26.attn.c_attn.bias": "model-00028-of-00082.safetensors", + "transformer.h.26.attn.c_attn.weight": "model-00028-of-00082.safetensors", + "transformer.h.26.attn.c_proj.weight": "model-00028-of-00082.safetensors", + "transformer.h.26.ln_1.weight": "model-00027-of-00082.safetensors", + "transformer.h.26.ln_2.weight": "model-00028-of-00082.safetensors", + "transformer.h.26.mlp.c_proj.weight": "model-00028-of-00082.safetensors", + "transformer.h.26.mlp.w1.weight": "model-00028-of-00082.safetensors", + "transformer.h.26.mlp.w2.weight": "model-00028-of-00082.safetensors", + "transformer.h.27.attn.c_attn.bias": "model-00029-of-00082.safetensors", + "transformer.h.27.attn.c_attn.weight": "model-00029-of-00082.safetensors", + "transformer.h.27.attn.c_proj.weight": "model-00029-of-00082.safetensors", + "transformer.h.27.ln_1.weight": "model-00028-of-00082.safetensors", + "transformer.h.27.ln_2.weight": "model-00029-of-00082.safetensors", + "transformer.h.27.mlp.c_proj.weight": "model-00029-of-00082.safetensors", + "transformer.h.27.mlp.w1.weight": "model-00029-of-00082.safetensors", + "transformer.h.27.mlp.w2.weight": "model-00029-of-00082.safetensors", + "transformer.h.28.attn.c_attn.bias": "model-00030-of-00082.safetensors", + "transformer.h.28.attn.c_attn.weight": "model-00030-of-00082.safetensors", + "transformer.h.28.attn.c_proj.weight": "model-00030-of-00082.safetensors", + "transformer.h.28.ln_1.weight": "model-00029-of-00082.safetensors", + "transformer.h.28.ln_2.weight": "model-00030-of-00082.safetensors", + "transformer.h.28.mlp.c_proj.weight": "model-00030-of-00082.safetensors", + "transformer.h.28.mlp.w1.weight": "model-00030-of-00082.safetensors", + "transformer.h.28.mlp.w2.weight": "model-00030-of-00082.safetensors", + "transformer.h.29.attn.c_attn.bias": "model-00031-of-00082.safetensors", + "transformer.h.29.attn.c_attn.weight": "model-00031-of-00082.safetensors", + "transformer.h.29.attn.c_proj.weight": "model-00031-of-00082.safetensors", + "transformer.h.29.ln_1.weight": "model-00030-of-00082.safetensors", + "transformer.h.29.ln_2.weight": "model-00031-of-00082.safetensors", + "transformer.h.29.mlp.c_proj.weight": "model-00031-of-00082.safetensors", + "transformer.h.29.mlp.w1.weight": "model-00031-of-00082.safetensors", + "transformer.h.29.mlp.w2.weight": "model-00031-of-00082.safetensors", + "transformer.h.3.attn.c_attn.bias": "model-00005-of-00082.safetensors", + "transformer.h.3.attn.c_attn.weight": "model-00005-of-00082.safetensors", + "transformer.h.3.attn.c_proj.weight": "model-00005-of-00082.safetensors", + "transformer.h.3.ln_1.weight": "model-00004-of-00082.safetensors", + "transformer.h.3.ln_2.weight": "model-00005-of-00082.safetensors", + "transformer.h.3.mlp.c_proj.weight": "model-00005-of-00082.safetensors", + "transformer.h.3.mlp.w1.weight": "model-00005-of-00082.safetensors", + "transformer.h.3.mlp.w2.weight": "model-00005-of-00082.safetensors", + "transformer.h.30.attn.c_attn.bias": "model-00032-of-00082.safetensors", + "transformer.h.30.attn.c_attn.weight": "model-00032-of-00082.safetensors", + "transformer.h.30.attn.c_proj.weight": "model-00032-of-00082.safetensors", + "transformer.h.30.ln_1.weight": "model-00031-of-00082.safetensors", + "transformer.h.30.ln_2.weight": "model-00032-of-00082.safetensors", + "transformer.h.30.mlp.c_proj.weight": "model-00032-of-00082.safetensors", + "transformer.h.30.mlp.w1.weight": "model-00032-of-00082.safetensors", + "transformer.h.30.mlp.w2.weight": "model-00032-of-00082.safetensors", + "transformer.h.31.attn.c_attn.bias": "model-00033-of-00082.safetensors", + "transformer.h.31.attn.c_attn.weight": "model-00033-of-00082.safetensors", + "transformer.h.31.attn.c_proj.weight": "model-00033-of-00082.safetensors", + "transformer.h.31.ln_1.weight": "model-00032-of-00082.safetensors", + "transformer.h.31.ln_2.weight": "model-00033-of-00082.safetensors", + "transformer.h.31.mlp.c_proj.weight": "model-00033-of-00082.safetensors", + "transformer.h.31.mlp.w1.weight": "model-00033-of-00082.safetensors", + "transformer.h.31.mlp.w2.weight": "model-00033-of-00082.safetensors", + "transformer.h.32.attn.c_attn.bias": "model-00034-of-00082.safetensors", + "transformer.h.32.attn.c_attn.weight": "model-00034-of-00082.safetensors", + "transformer.h.32.attn.c_proj.weight": "model-00034-of-00082.safetensors", + "transformer.h.32.ln_1.weight": "model-00033-of-00082.safetensors", + "transformer.h.32.ln_2.weight": "model-00034-of-00082.safetensors", + "transformer.h.32.mlp.c_proj.weight": "model-00034-of-00082.safetensors", + "transformer.h.32.mlp.w1.weight": "model-00034-of-00082.safetensors", + "transformer.h.32.mlp.w2.weight": "model-00034-of-00082.safetensors", + "transformer.h.33.attn.c_attn.bias": "model-00035-of-00082.safetensors", + "transformer.h.33.attn.c_attn.weight": "model-00035-of-00082.safetensors", + "transformer.h.33.attn.c_proj.weight": "model-00035-of-00082.safetensors", + "transformer.h.33.ln_1.weight": "model-00034-of-00082.safetensors", + "transformer.h.33.ln_2.weight": "model-00035-of-00082.safetensors", + "transformer.h.33.mlp.c_proj.weight": "model-00035-of-00082.safetensors", + "transformer.h.33.mlp.w1.weight": "model-00035-of-00082.safetensors", + "transformer.h.33.mlp.w2.weight": "model-00035-of-00082.safetensors", + "transformer.h.34.attn.c_attn.bias": "model-00036-of-00082.safetensors", + "transformer.h.34.attn.c_attn.weight": "model-00036-of-00082.safetensors", + "transformer.h.34.attn.c_proj.weight": "model-00036-of-00082.safetensors", + "transformer.h.34.ln_1.weight": "model-00035-of-00082.safetensors", + "transformer.h.34.ln_2.weight": "model-00036-of-00082.safetensors", + "transformer.h.34.mlp.c_proj.weight": "model-00036-of-00082.safetensors", + "transformer.h.34.mlp.w1.weight": "model-00036-of-00082.safetensors", + "transformer.h.34.mlp.w2.weight": "model-00036-of-00082.safetensors", + "transformer.h.35.attn.c_attn.bias": "model-00037-of-00082.safetensors", + "transformer.h.35.attn.c_attn.weight": "model-00037-of-00082.safetensors", + "transformer.h.35.attn.c_proj.weight": "model-00037-of-00082.safetensors", + "transformer.h.35.ln_1.weight": "model-00036-of-00082.safetensors", + "transformer.h.35.ln_2.weight": "model-00037-of-00082.safetensors", + "transformer.h.35.mlp.c_proj.weight": "model-00037-of-00082.safetensors", + "transformer.h.35.mlp.w1.weight": "model-00037-of-00082.safetensors", + "transformer.h.35.mlp.w2.weight": "model-00037-of-00082.safetensors", + "transformer.h.36.attn.c_attn.bias": "model-00038-of-00082.safetensors", + "transformer.h.36.attn.c_attn.weight": "model-00038-of-00082.safetensors", + "transformer.h.36.attn.c_proj.weight": "model-00038-of-00082.safetensors", + "transformer.h.36.ln_1.weight": "model-00037-of-00082.safetensors", + "transformer.h.36.ln_2.weight": "model-00038-of-00082.safetensors", + "transformer.h.36.mlp.c_proj.weight": "model-00038-of-00082.safetensors", + "transformer.h.36.mlp.w1.weight": "model-00038-of-00082.safetensors", + "transformer.h.36.mlp.w2.weight": "model-00038-of-00082.safetensors", + "transformer.h.37.attn.c_attn.bias": "model-00039-of-00082.safetensors", + "transformer.h.37.attn.c_attn.weight": "model-00039-of-00082.safetensors", + "transformer.h.37.attn.c_proj.weight": "model-00039-of-00082.safetensors", + "transformer.h.37.ln_1.weight": "model-00038-of-00082.safetensors", + "transformer.h.37.ln_2.weight": "model-00039-of-00082.safetensors", + "transformer.h.37.mlp.c_proj.weight": "model-00039-of-00082.safetensors", + "transformer.h.37.mlp.w1.weight": "model-00039-of-00082.safetensors", + "transformer.h.37.mlp.w2.weight": "model-00039-of-00082.safetensors", + "transformer.h.38.attn.c_attn.bias": "model-00040-of-00082.safetensors", + "transformer.h.38.attn.c_attn.weight": "model-00040-of-00082.safetensors", + "transformer.h.38.attn.c_proj.weight": "model-00040-of-00082.safetensors", + "transformer.h.38.ln_1.weight": "model-00039-of-00082.safetensors", + "transformer.h.38.ln_2.weight": "model-00040-of-00082.safetensors", + "transformer.h.38.mlp.c_proj.weight": "model-00040-of-00082.safetensors", + "transformer.h.38.mlp.w1.weight": "model-00040-of-00082.safetensors", + "transformer.h.38.mlp.w2.weight": "model-00040-of-00082.safetensors", + "transformer.h.39.attn.c_attn.bias": "model-00041-of-00082.safetensors", + "transformer.h.39.attn.c_attn.weight": "model-00041-of-00082.safetensors", + "transformer.h.39.attn.c_proj.weight": "model-00041-of-00082.safetensors", + "transformer.h.39.ln_1.weight": "model-00040-of-00082.safetensors", + "transformer.h.39.ln_2.weight": "model-00041-of-00082.safetensors", + "transformer.h.39.mlp.c_proj.weight": "model-00041-of-00082.safetensors", + "transformer.h.39.mlp.w1.weight": "model-00041-of-00082.safetensors", + "transformer.h.39.mlp.w2.weight": "model-00041-of-00082.safetensors", + "transformer.h.4.attn.c_attn.bias": "model-00006-of-00082.safetensors", + "transformer.h.4.attn.c_attn.weight": "model-00006-of-00082.safetensors", + "transformer.h.4.attn.c_proj.weight": "model-00006-of-00082.safetensors", + "transformer.h.4.ln_1.weight": "model-00005-of-00082.safetensors", + "transformer.h.4.ln_2.weight": "model-00006-of-00082.safetensors", + "transformer.h.4.mlp.c_proj.weight": "model-00006-of-00082.safetensors", + "transformer.h.4.mlp.w1.weight": "model-00006-of-00082.safetensors", + "transformer.h.4.mlp.w2.weight": "model-00006-of-00082.safetensors", + "transformer.h.40.attn.c_attn.bias": "model-00042-of-00082.safetensors", + "transformer.h.40.attn.c_attn.weight": "model-00042-of-00082.safetensors", + "transformer.h.40.attn.c_proj.weight": "model-00042-of-00082.safetensors", + "transformer.h.40.ln_1.weight": "model-00041-of-00082.safetensors", + "transformer.h.40.ln_2.weight": "model-00042-of-00082.safetensors", + "transformer.h.40.mlp.c_proj.weight": "model-00042-of-00082.safetensors", + "transformer.h.40.mlp.w1.weight": "model-00042-of-00082.safetensors", + "transformer.h.40.mlp.w2.weight": "model-00042-of-00082.safetensors", + "transformer.h.41.attn.c_attn.bias": "model-00043-of-00082.safetensors", + "transformer.h.41.attn.c_attn.weight": "model-00043-of-00082.safetensors", + "transformer.h.41.attn.c_proj.weight": "model-00043-of-00082.safetensors", + "transformer.h.41.ln_1.weight": "model-00042-of-00082.safetensors", + "transformer.h.41.ln_2.weight": "model-00043-of-00082.safetensors", + "transformer.h.41.mlp.c_proj.weight": "model-00043-of-00082.safetensors", + "transformer.h.41.mlp.w1.weight": "model-00043-of-00082.safetensors", + "transformer.h.41.mlp.w2.weight": "model-00043-of-00082.safetensors", + "transformer.h.42.attn.c_attn.bias": "model-00044-of-00082.safetensors", + "transformer.h.42.attn.c_attn.weight": "model-00044-of-00082.safetensors", + "transformer.h.42.attn.c_proj.weight": "model-00044-of-00082.safetensors", + "transformer.h.42.ln_1.weight": "model-00043-of-00082.safetensors", + "transformer.h.42.ln_2.weight": "model-00044-of-00082.safetensors", + "transformer.h.42.mlp.c_proj.weight": "model-00044-of-00082.safetensors", + "transformer.h.42.mlp.w1.weight": "model-00044-of-00082.safetensors", + "transformer.h.42.mlp.w2.weight": "model-00044-of-00082.safetensors", + "transformer.h.43.attn.c_attn.bias": "model-00045-of-00082.safetensors", + "transformer.h.43.attn.c_attn.weight": "model-00045-of-00082.safetensors", + "transformer.h.43.attn.c_proj.weight": "model-00045-of-00082.safetensors", + "transformer.h.43.ln_1.weight": "model-00044-of-00082.safetensors", + "transformer.h.43.ln_2.weight": "model-00045-of-00082.safetensors", + "transformer.h.43.mlp.c_proj.weight": "model-00045-of-00082.safetensors", + "transformer.h.43.mlp.w1.weight": "model-00045-of-00082.safetensors", + "transformer.h.43.mlp.w2.weight": "model-00045-of-00082.safetensors", + "transformer.h.44.attn.c_attn.bias": "model-00046-of-00082.safetensors", + "transformer.h.44.attn.c_attn.weight": "model-00046-of-00082.safetensors", + "transformer.h.44.attn.c_proj.weight": "model-00046-of-00082.safetensors", + "transformer.h.44.ln_1.weight": "model-00045-of-00082.safetensors", + "transformer.h.44.ln_2.weight": "model-00046-of-00082.safetensors", + "transformer.h.44.mlp.c_proj.weight": "model-00046-of-00082.safetensors", + "transformer.h.44.mlp.w1.weight": "model-00046-of-00082.safetensors", + "transformer.h.44.mlp.w2.weight": "model-00046-of-00082.safetensors", + "transformer.h.45.attn.c_attn.bias": "model-00047-of-00082.safetensors", + "transformer.h.45.attn.c_attn.weight": "model-00047-of-00082.safetensors", + "transformer.h.45.attn.c_proj.weight": "model-00047-of-00082.safetensors", + "transformer.h.45.ln_1.weight": "model-00046-of-00082.safetensors", + "transformer.h.45.ln_2.weight": "model-00047-of-00082.safetensors", + "transformer.h.45.mlp.c_proj.weight": "model-00047-of-00082.safetensors", + "transformer.h.45.mlp.w1.weight": "model-00047-of-00082.safetensors", + "transformer.h.45.mlp.w2.weight": "model-00047-of-00082.safetensors", + "transformer.h.46.attn.c_attn.bias": "model-00048-of-00082.safetensors", + "transformer.h.46.attn.c_attn.weight": "model-00048-of-00082.safetensors", + "transformer.h.46.attn.c_proj.weight": "model-00048-of-00082.safetensors", + "transformer.h.46.ln_1.weight": "model-00047-of-00082.safetensors", + "transformer.h.46.ln_2.weight": "model-00048-of-00082.safetensors", + "transformer.h.46.mlp.c_proj.weight": "model-00048-of-00082.safetensors", + "transformer.h.46.mlp.w1.weight": "model-00048-of-00082.safetensors", + "transformer.h.46.mlp.w2.weight": "model-00048-of-00082.safetensors", + "transformer.h.47.attn.c_attn.bias": "model-00049-of-00082.safetensors", + "transformer.h.47.attn.c_attn.weight": "model-00049-of-00082.safetensors", + "transformer.h.47.attn.c_proj.weight": "model-00049-of-00082.safetensors", + "transformer.h.47.ln_1.weight": "model-00048-of-00082.safetensors", + "transformer.h.47.ln_2.weight": "model-00049-of-00082.safetensors", + "transformer.h.47.mlp.c_proj.weight": "model-00049-of-00082.safetensors", + "transformer.h.47.mlp.w1.weight": "model-00049-of-00082.safetensors", + "transformer.h.47.mlp.w2.weight": "model-00049-of-00082.safetensors", + "transformer.h.48.attn.c_attn.bias": "model-00050-of-00082.safetensors", + "transformer.h.48.attn.c_attn.weight": "model-00050-of-00082.safetensors", + "transformer.h.48.attn.c_proj.weight": "model-00050-of-00082.safetensors", + "transformer.h.48.ln_1.weight": "model-00049-of-00082.safetensors", + "transformer.h.48.ln_2.weight": "model-00050-of-00082.safetensors", + "transformer.h.48.mlp.c_proj.weight": "model-00050-of-00082.safetensors", + "transformer.h.48.mlp.w1.weight": "model-00050-of-00082.safetensors", + "transformer.h.48.mlp.w2.weight": "model-00050-of-00082.safetensors", + "transformer.h.49.attn.c_attn.bias": "model-00051-of-00082.safetensors", + "transformer.h.49.attn.c_attn.weight": "model-00051-of-00082.safetensors", + "transformer.h.49.attn.c_proj.weight": "model-00051-of-00082.safetensors", + "transformer.h.49.ln_1.weight": "model-00050-of-00082.safetensors", + "transformer.h.49.ln_2.weight": "model-00051-of-00082.safetensors", + "transformer.h.49.mlp.c_proj.weight": "model-00051-of-00082.safetensors", + "transformer.h.49.mlp.w1.weight": "model-00051-of-00082.safetensors", + "transformer.h.49.mlp.w2.weight": "model-00051-of-00082.safetensors", + "transformer.h.5.attn.c_attn.bias": "model-00007-of-00082.safetensors", + "transformer.h.5.attn.c_attn.weight": "model-00007-of-00082.safetensors", + "transformer.h.5.attn.c_proj.weight": "model-00007-of-00082.safetensors", + "transformer.h.5.ln_1.weight": "model-00006-of-00082.safetensors", + "transformer.h.5.ln_2.weight": "model-00007-of-00082.safetensors", + "transformer.h.5.mlp.c_proj.weight": "model-00007-of-00082.safetensors", + "transformer.h.5.mlp.w1.weight": "model-00007-of-00082.safetensors", + "transformer.h.5.mlp.w2.weight": "model-00007-of-00082.safetensors", + "transformer.h.50.attn.c_attn.bias": "model-00052-of-00082.safetensors", + "transformer.h.50.attn.c_attn.weight": "model-00052-of-00082.safetensors", + "transformer.h.50.attn.c_proj.weight": "model-00052-of-00082.safetensors", + "transformer.h.50.ln_1.weight": "model-00051-of-00082.safetensors", + "transformer.h.50.ln_2.weight": "model-00052-of-00082.safetensors", + "transformer.h.50.mlp.c_proj.weight": "model-00052-of-00082.safetensors", + "transformer.h.50.mlp.w1.weight": "model-00052-of-00082.safetensors", + "transformer.h.50.mlp.w2.weight": "model-00052-of-00082.safetensors", + "transformer.h.51.attn.c_attn.bias": "model-00053-of-00082.safetensors", + "transformer.h.51.attn.c_attn.weight": "model-00053-of-00082.safetensors", + "transformer.h.51.attn.c_proj.weight": "model-00053-of-00082.safetensors", + "transformer.h.51.ln_1.weight": "model-00052-of-00082.safetensors", + "transformer.h.51.ln_2.weight": "model-00053-of-00082.safetensors", + "transformer.h.51.mlp.c_proj.weight": "model-00053-of-00082.safetensors", + "transformer.h.51.mlp.w1.weight": "model-00053-of-00082.safetensors", + "transformer.h.51.mlp.w2.weight": "model-00053-of-00082.safetensors", + "transformer.h.52.attn.c_attn.bias": "model-00054-of-00082.safetensors", + "transformer.h.52.attn.c_attn.weight": "model-00054-of-00082.safetensors", + "transformer.h.52.attn.c_proj.weight": "model-00054-of-00082.safetensors", + "transformer.h.52.ln_1.weight": "model-00053-of-00082.safetensors", + "transformer.h.52.ln_2.weight": "model-00054-of-00082.safetensors", + "transformer.h.52.mlp.c_proj.weight": "model-00054-of-00082.safetensors", + "transformer.h.52.mlp.w1.weight": "model-00054-of-00082.safetensors", + "transformer.h.52.mlp.w2.weight": "model-00054-of-00082.safetensors", + "transformer.h.53.attn.c_attn.bias": "model-00055-of-00082.safetensors", + "transformer.h.53.attn.c_attn.weight": "model-00055-of-00082.safetensors", + "transformer.h.53.attn.c_proj.weight": "model-00055-of-00082.safetensors", + "transformer.h.53.ln_1.weight": "model-00054-of-00082.safetensors", + "transformer.h.53.ln_2.weight": "model-00055-of-00082.safetensors", + "transformer.h.53.mlp.c_proj.weight": "model-00055-of-00082.safetensors", + "transformer.h.53.mlp.w1.weight": "model-00055-of-00082.safetensors", + "transformer.h.53.mlp.w2.weight": "model-00055-of-00082.safetensors", + "transformer.h.54.attn.c_attn.bias": "model-00056-of-00082.safetensors", + "transformer.h.54.attn.c_attn.weight": "model-00056-of-00082.safetensors", + "transformer.h.54.attn.c_proj.weight": "model-00056-of-00082.safetensors", + "transformer.h.54.ln_1.weight": "model-00055-of-00082.safetensors", + "transformer.h.54.ln_2.weight": "model-00056-of-00082.safetensors", + "transformer.h.54.mlp.c_proj.weight": "model-00056-of-00082.safetensors", + "transformer.h.54.mlp.w1.weight": "model-00056-of-00082.safetensors", + "transformer.h.54.mlp.w2.weight": "model-00056-of-00082.safetensors", + "transformer.h.55.attn.c_attn.bias": "model-00057-of-00082.safetensors", + "transformer.h.55.attn.c_attn.weight": "model-00057-of-00082.safetensors", + "transformer.h.55.attn.c_proj.weight": "model-00057-of-00082.safetensors", + "transformer.h.55.ln_1.weight": "model-00056-of-00082.safetensors", + "transformer.h.55.ln_2.weight": "model-00057-of-00082.safetensors", + "transformer.h.55.mlp.c_proj.weight": "model-00057-of-00082.safetensors", + "transformer.h.55.mlp.w1.weight": "model-00057-of-00082.safetensors", + "transformer.h.55.mlp.w2.weight": "model-00057-of-00082.safetensors", + "transformer.h.56.attn.c_attn.bias": "model-00058-of-00082.safetensors", + "transformer.h.56.attn.c_attn.weight": "model-00058-of-00082.safetensors", + "transformer.h.56.attn.c_proj.weight": "model-00058-of-00082.safetensors", + "transformer.h.56.ln_1.weight": "model-00057-of-00082.safetensors", + "transformer.h.56.ln_2.weight": "model-00058-of-00082.safetensors", + "transformer.h.56.mlp.c_proj.weight": "model-00058-of-00082.safetensors", + "transformer.h.56.mlp.w1.weight": "model-00058-of-00082.safetensors", + "transformer.h.56.mlp.w2.weight": "model-00058-of-00082.safetensors", + "transformer.h.57.attn.c_attn.bias": "model-00059-of-00082.safetensors", + "transformer.h.57.attn.c_attn.weight": "model-00059-of-00082.safetensors", + "transformer.h.57.attn.c_proj.weight": "model-00059-of-00082.safetensors", + "transformer.h.57.ln_1.weight": "model-00058-of-00082.safetensors", + "transformer.h.57.ln_2.weight": "model-00059-of-00082.safetensors", + "transformer.h.57.mlp.c_proj.weight": "model-00059-of-00082.safetensors", + "transformer.h.57.mlp.w1.weight": "model-00059-of-00082.safetensors", + "transformer.h.57.mlp.w2.weight": "model-00059-of-00082.safetensors", + "transformer.h.58.attn.c_attn.bias": "model-00060-of-00082.safetensors", + "transformer.h.58.attn.c_attn.weight": "model-00060-of-00082.safetensors", + "transformer.h.58.attn.c_proj.weight": "model-00060-of-00082.safetensors", + "transformer.h.58.ln_1.weight": "model-00059-of-00082.safetensors", + "transformer.h.58.ln_2.weight": "model-00060-of-00082.safetensors", + "transformer.h.58.mlp.c_proj.weight": "model-00060-of-00082.safetensors", + "transformer.h.58.mlp.w1.weight": "model-00060-of-00082.safetensors", + "transformer.h.58.mlp.w2.weight": "model-00060-of-00082.safetensors", + "transformer.h.59.attn.c_attn.bias": "model-00061-of-00082.safetensors", + "transformer.h.59.attn.c_attn.weight": "model-00061-of-00082.safetensors", + "transformer.h.59.attn.c_proj.weight": "model-00061-of-00082.safetensors", + "transformer.h.59.ln_1.weight": "model-00060-of-00082.safetensors", + "transformer.h.59.ln_2.weight": "model-00061-of-00082.safetensors", + "transformer.h.59.mlp.c_proj.weight": "model-00061-of-00082.safetensors", + "transformer.h.59.mlp.w1.weight": "model-00061-of-00082.safetensors", + "transformer.h.59.mlp.w2.weight": "model-00061-of-00082.safetensors", + "transformer.h.6.attn.c_attn.bias": "model-00008-of-00082.safetensors", + "transformer.h.6.attn.c_attn.weight": "model-00008-of-00082.safetensors", + "transformer.h.6.attn.c_proj.weight": "model-00008-of-00082.safetensors", + "transformer.h.6.ln_1.weight": "model-00007-of-00082.safetensors", + "transformer.h.6.ln_2.weight": "model-00008-of-00082.safetensors", + "transformer.h.6.mlp.c_proj.weight": "model-00008-of-00082.safetensors", + "transformer.h.6.mlp.w1.weight": "model-00008-of-00082.safetensors", + "transformer.h.6.mlp.w2.weight": "model-00008-of-00082.safetensors", + "transformer.h.60.attn.c_attn.bias": "model-00062-of-00082.safetensors", + "transformer.h.60.attn.c_attn.weight": "model-00062-of-00082.safetensors", + "transformer.h.60.attn.c_proj.weight": "model-00062-of-00082.safetensors", + "transformer.h.60.ln_1.weight": "model-00061-of-00082.safetensors", + "transformer.h.60.ln_2.weight": "model-00062-of-00082.safetensors", + "transformer.h.60.mlp.c_proj.weight": "model-00062-of-00082.safetensors", + "transformer.h.60.mlp.w1.weight": "model-00062-of-00082.safetensors", + "transformer.h.60.mlp.w2.weight": "model-00062-of-00082.safetensors", + "transformer.h.61.attn.c_attn.bias": "model-00063-of-00082.safetensors", + "transformer.h.61.attn.c_attn.weight": "model-00063-of-00082.safetensors", + "transformer.h.61.attn.c_proj.weight": "model-00063-of-00082.safetensors", + "transformer.h.61.ln_1.weight": "model-00062-of-00082.safetensors", + "transformer.h.61.ln_2.weight": "model-00063-of-00082.safetensors", + "transformer.h.61.mlp.c_proj.weight": "model-00063-of-00082.safetensors", + "transformer.h.61.mlp.w1.weight": "model-00063-of-00082.safetensors", + "transformer.h.61.mlp.w2.weight": "model-00063-of-00082.safetensors", + "transformer.h.62.attn.c_attn.bias": "model-00064-of-00082.safetensors", + "transformer.h.62.attn.c_attn.weight": "model-00064-of-00082.safetensors", + "transformer.h.62.attn.c_proj.weight": "model-00064-of-00082.safetensors", + "transformer.h.62.ln_1.weight": "model-00063-of-00082.safetensors", + "transformer.h.62.ln_2.weight": "model-00064-of-00082.safetensors", + "transformer.h.62.mlp.c_proj.weight": "model-00064-of-00082.safetensors", + "transformer.h.62.mlp.w1.weight": "model-00064-of-00082.safetensors", + "transformer.h.62.mlp.w2.weight": "model-00064-of-00082.safetensors", + "transformer.h.63.attn.c_attn.bias": "model-00065-of-00082.safetensors", + "transformer.h.63.attn.c_attn.weight": "model-00065-of-00082.safetensors", + "transformer.h.63.attn.c_proj.weight": "model-00065-of-00082.safetensors", + "transformer.h.63.ln_1.weight": "model-00064-of-00082.safetensors", + "transformer.h.63.ln_2.weight": "model-00065-of-00082.safetensors", + "transformer.h.63.mlp.c_proj.weight": "model-00065-of-00082.safetensors", + "transformer.h.63.mlp.w1.weight": "model-00065-of-00082.safetensors", + "transformer.h.63.mlp.w2.weight": "model-00065-of-00082.safetensors", + "transformer.h.64.attn.c_attn.bias": "model-00066-of-00082.safetensors", + "transformer.h.64.attn.c_attn.weight": "model-00066-of-00082.safetensors", + "transformer.h.64.attn.c_proj.weight": "model-00066-of-00082.safetensors", + "transformer.h.64.ln_1.weight": "model-00065-of-00082.safetensors", + "transformer.h.64.ln_2.weight": "model-00066-of-00082.safetensors", + "transformer.h.64.mlp.c_proj.weight": "model-00066-of-00082.safetensors", + "transformer.h.64.mlp.w1.weight": "model-00066-of-00082.safetensors", + "transformer.h.64.mlp.w2.weight": "model-00066-of-00082.safetensors", + "transformer.h.65.attn.c_attn.bias": "model-00067-of-00082.safetensors", + "transformer.h.65.attn.c_attn.weight": "model-00067-of-00082.safetensors", + "transformer.h.65.attn.c_proj.weight": "model-00067-of-00082.safetensors", + "transformer.h.65.ln_1.weight": "model-00066-of-00082.safetensors", + "transformer.h.65.ln_2.weight": "model-00067-of-00082.safetensors", + "transformer.h.65.mlp.c_proj.weight": "model-00067-of-00082.safetensors", + "transformer.h.65.mlp.w1.weight": "model-00067-of-00082.safetensors", + "transformer.h.65.mlp.w2.weight": "model-00067-of-00082.safetensors", + "transformer.h.66.attn.c_attn.bias": "model-00068-of-00082.safetensors", + "transformer.h.66.attn.c_attn.weight": "model-00068-of-00082.safetensors", + "transformer.h.66.attn.c_proj.weight": "model-00068-of-00082.safetensors", + "transformer.h.66.ln_1.weight": "model-00067-of-00082.safetensors", + "transformer.h.66.ln_2.weight": "model-00068-of-00082.safetensors", + "transformer.h.66.mlp.c_proj.weight": "model-00068-of-00082.safetensors", + "transformer.h.66.mlp.w1.weight": "model-00068-of-00082.safetensors", + "transformer.h.66.mlp.w2.weight": "model-00068-of-00082.safetensors", + "transformer.h.67.attn.c_attn.bias": "model-00069-of-00082.safetensors", + "transformer.h.67.attn.c_attn.weight": "model-00069-of-00082.safetensors", + "transformer.h.67.attn.c_proj.weight": "model-00069-of-00082.safetensors", + "transformer.h.67.ln_1.weight": "model-00068-of-00082.safetensors", + "transformer.h.67.ln_2.weight": "model-00069-of-00082.safetensors", + "transformer.h.67.mlp.c_proj.weight": "model-00069-of-00082.safetensors", + "transformer.h.67.mlp.w1.weight": "model-00069-of-00082.safetensors", + "transformer.h.67.mlp.w2.weight": "model-00069-of-00082.safetensors", + "transformer.h.68.attn.c_attn.bias": "model-00070-of-00082.safetensors", + "transformer.h.68.attn.c_attn.weight": "model-00070-of-00082.safetensors", + "transformer.h.68.attn.c_proj.weight": "model-00070-of-00082.safetensors", + "transformer.h.68.ln_1.weight": "model-00069-of-00082.safetensors", + "transformer.h.68.ln_2.weight": "model-00070-of-00082.safetensors", + "transformer.h.68.mlp.c_proj.weight": "model-00070-of-00082.safetensors", + "transformer.h.68.mlp.w1.weight": "model-00070-of-00082.safetensors", + "transformer.h.68.mlp.w2.weight": "model-00070-of-00082.safetensors", + "transformer.h.69.attn.c_attn.bias": "model-00071-of-00082.safetensors", + "transformer.h.69.attn.c_attn.weight": "model-00071-of-00082.safetensors", + "transformer.h.69.attn.c_proj.weight": "model-00071-of-00082.safetensors", + "transformer.h.69.ln_1.weight": "model-00070-of-00082.safetensors", + "transformer.h.69.ln_2.weight": "model-00071-of-00082.safetensors", + "transformer.h.69.mlp.c_proj.weight": "model-00071-of-00082.safetensors", + "transformer.h.69.mlp.w1.weight": "model-00071-of-00082.safetensors", + "transformer.h.69.mlp.w2.weight": "model-00071-of-00082.safetensors", + "transformer.h.7.attn.c_attn.bias": "model-00009-of-00082.safetensors", + "transformer.h.7.attn.c_attn.weight": "model-00009-of-00082.safetensors", + "transformer.h.7.attn.c_proj.weight": "model-00009-of-00082.safetensors", + "transformer.h.7.ln_1.weight": "model-00008-of-00082.safetensors", + "transformer.h.7.ln_2.weight": "model-00009-of-00082.safetensors", + "transformer.h.7.mlp.c_proj.weight": "model-00009-of-00082.safetensors", + "transformer.h.7.mlp.w1.weight": "model-00009-of-00082.safetensors", + "transformer.h.7.mlp.w2.weight": "model-00009-of-00082.safetensors", + "transformer.h.70.attn.c_attn.bias": "model-00072-of-00082.safetensors", + "transformer.h.70.attn.c_attn.weight": "model-00072-of-00082.safetensors", + "transformer.h.70.attn.c_proj.weight": "model-00072-of-00082.safetensors", + "transformer.h.70.ln_1.weight": "model-00071-of-00082.safetensors", + "transformer.h.70.ln_2.weight": "model-00072-of-00082.safetensors", + "transformer.h.70.mlp.c_proj.weight": "model-00072-of-00082.safetensors", + "transformer.h.70.mlp.w1.weight": "model-00072-of-00082.safetensors", + "transformer.h.70.mlp.w2.weight": "model-00072-of-00082.safetensors", + "transformer.h.71.attn.c_attn.bias": "model-00073-of-00082.safetensors", + "transformer.h.71.attn.c_attn.weight": "model-00073-of-00082.safetensors", + "transformer.h.71.attn.c_proj.weight": "model-00073-of-00082.safetensors", + "transformer.h.71.ln_1.weight": "model-00072-of-00082.safetensors", + "transformer.h.71.ln_2.weight": "model-00073-of-00082.safetensors", + "transformer.h.71.mlp.c_proj.weight": "model-00073-of-00082.safetensors", + "transformer.h.71.mlp.w1.weight": "model-00073-of-00082.safetensors", + "transformer.h.71.mlp.w2.weight": "model-00073-of-00082.safetensors", + "transformer.h.72.attn.c_attn.bias": "model-00074-of-00082.safetensors", + "transformer.h.72.attn.c_attn.weight": "model-00074-of-00082.safetensors", + "transformer.h.72.attn.c_proj.weight": "model-00074-of-00082.safetensors", + "transformer.h.72.ln_1.weight": "model-00073-of-00082.safetensors", + "transformer.h.72.ln_2.weight": "model-00074-of-00082.safetensors", + "transformer.h.72.mlp.c_proj.weight": "model-00074-of-00082.safetensors", + "transformer.h.72.mlp.w1.weight": "model-00074-of-00082.safetensors", + "transformer.h.72.mlp.w2.weight": "model-00074-of-00082.safetensors", + "transformer.h.73.attn.c_attn.bias": "model-00075-of-00082.safetensors", + "transformer.h.73.attn.c_attn.weight": "model-00075-of-00082.safetensors", + "transformer.h.73.attn.c_proj.weight": "model-00075-of-00082.safetensors", + "transformer.h.73.ln_1.weight": "model-00074-of-00082.safetensors", + "transformer.h.73.ln_2.weight": "model-00075-of-00082.safetensors", + "transformer.h.73.mlp.c_proj.weight": "model-00075-of-00082.safetensors", + "transformer.h.73.mlp.w1.weight": "model-00075-of-00082.safetensors", + "transformer.h.73.mlp.w2.weight": "model-00075-of-00082.safetensors", + "transformer.h.74.attn.c_attn.bias": "model-00076-of-00082.safetensors", + "transformer.h.74.attn.c_attn.weight": "model-00076-of-00082.safetensors", + "transformer.h.74.attn.c_proj.weight": "model-00076-of-00082.safetensors", + "transformer.h.74.ln_1.weight": "model-00075-of-00082.safetensors", + "transformer.h.74.ln_2.weight": "model-00076-of-00082.safetensors", + "transformer.h.74.mlp.c_proj.weight": "model-00076-of-00082.safetensors", + "transformer.h.74.mlp.w1.weight": "model-00076-of-00082.safetensors", + "transformer.h.74.mlp.w2.weight": "model-00076-of-00082.safetensors", + "transformer.h.75.attn.c_attn.bias": "model-00077-of-00082.safetensors", + "transformer.h.75.attn.c_attn.weight": "model-00077-of-00082.safetensors", + "transformer.h.75.attn.c_proj.weight": "model-00077-of-00082.safetensors", + "transformer.h.75.ln_1.weight": "model-00076-of-00082.safetensors", + "transformer.h.75.ln_2.weight": "model-00077-of-00082.safetensors", + "transformer.h.75.mlp.c_proj.weight": "model-00077-of-00082.safetensors", + "transformer.h.75.mlp.w1.weight": "model-00077-of-00082.safetensors", + "transformer.h.75.mlp.w2.weight": "model-00077-of-00082.safetensors", + "transformer.h.76.attn.c_attn.bias": "model-00078-of-00082.safetensors", + "transformer.h.76.attn.c_attn.weight": "model-00078-of-00082.safetensors", + "transformer.h.76.attn.c_proj.weight": "model-00078-of-00082.safetensors", + "transformer.h.76.ln_1.weight": "model-00077-of-00082.safetensors", + "transformer.h.76.ln_2.weight": "model-00078-of-00082.safetensors", + "transformer.h.76.mlp.c_proj.weight": "model-00078-of-00082.safetensors", + "transformer.h.76.mlp.w1.weight": "model-00078-of-00082.safetensors", + "transformer.h.76.mlp.w2.weight": "model-00078-of-00082.safetensors", + "transformer.h.77.attn.c_attn.bias": "model-00079-of-00082.safetensors", + "transformer.h.77.attn.c_attn.weight": "model-00079-of-00082.safetensors", + "transformer.h.77.attn.c_proj.weight": "model-00079-of-00082.safetensors", + "transformer.h.77.ln_1.weight": "model-00078-of-00082.safetensors", + "transformer.h.77.ln_2.weight": "model-00079-of-00082.safetensors", + "transformer.h.77.mlp.c_proj.weight": "model-00079-of-00082.safetensors", + "transformer.h.77.mlp.w1.weight": "model-00079-of-00082.safetensors", + "transformer.h.77.mlp.w2.weight": "model-00079-of-00082.safetensors", + "transformer.h.78.attn.c_attn.bias": "model-00080-of-00082.safetensors", + "transformer.h.78.attn.c_attn.weight": "model-00080-of-00082.safetensors", + "transformer.h.78.attn.c_proj.weight": "model-00080-of-00082.safetensors", + "transformer.h.78.ln_1.weight": "model-00079-of-00082.safetensors", + "transformer.h.78.ln_2.weight": "model-00080-of-00082.safetensors", + "transformer.h.78.mlp.c_proj.weight": "model-00080-of-00082.safetensors", + "transformer.h.78.mlp.w1.weight": "model-00080-of-00082.safetensors", + "transformer.h.78.mlp.w2.weight": "model-00080-of-00082.safetensors", + "transformer.h.79.attn.c_attn.bias": "model-00081-of-00082.safetensors", + "transformer.h.79.attn.c_attn.weight": "model-00081-of-00082.safetensors", + "transformer.h.79.attn.c_proj.weight": "model-00081-of-00082.safetensors", + "transformer.h.79.ln_1.weight": "model-00080-of-00082.safetensors", + "transformer.h.79.ln_2.weight": "model-00081-of-00082.safetensors", + "transformer.h.79.mlp.c_proj.weight": "model-00081-of-00082.safetensors", + "transformer.h.79.mlp.w1.weight": "model-00081-of-00082.safetensors", + "transformer.h.79.mlp.w2.weight": "model-00081-of-00082.safetensors", + "transformer.h.8.attn.c_attn.bias": "model-00010-of-00082.safetensors", + "transformer.h.8.attn.c_attn.weight": "model-00010-of-00082.safetensors", + "transformer.h.8.attn.c_proj.weight": "model-00010-of-00082.safetensors", + "transformer.h.8.ln_1.weight": "model-00009-of-00082.safetensors", + "transformer.h.8.ln_2.weight": "model-00010-of-00082.safetensors", + "transformer.h.8.mlp.c_proj.weight": "model-00010-of-00082.safetensors", + "transformer.h.8.mlp.w1.weight": "model-00010-of-00082.safetensors", + "transformer.h.8.mlp.w2.weight": "model-00010-of-00082.safetensors", + "transformer.h.9.attn.c_attn.bias": "model-00011-of-00082.safetensors", + "transformer.h.9.attn.c_attn.weight": "model-00011-of-00082.safetensors", + "transformer.h.9.attn.c_proj.weight": "model-00011-of-00082.safetensors", + "transformer.h.9.ln_1.weight": "model-00010-of-00082.safetensors", + "transformer.h.9.ln_2.weight": "model-00011-of-00082.safetensors", + "transformer.h.9.mlp.c_proj.weight": "model-00011-of-00082.safetensors", + "transformer.h.9.mlp.w1.weight": "model-00011-of-00082.safetensors", + "transformer.h.9.mlp.w2.weight": "model-00011-of-00082.safetensors", + "transformer.ln_f.weight": "model-00081-of-00082.safetensors", + "transformer.wte.weight": "model-00001-of-00082.safetensors" } } diff --git a/modeling_qwen.py b/modeling_qwen.py index 650691845e6a69f53031dabbdd618d0b6961affe..646ba426deaa808070d2cd7f3a40778295c67b21 100644 --- a/modeling_qwen.py +++ b/modeling_qwen.py @@ -3,14 +3,16 @@ # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. +import copy import importlib import math +import pathlib from typing import TYPE_CHECKING, Optional, Tuple, Union, Callable, List, Any, Generator import torch import torch.nn.functional as F import torch.utils.checkpoint -from torch.cuda.amp import autocast +import warnings from torch.nn import CrossEntropyLoss from transformers import PreTrainedTokenizer, GenerationConfig, StoppingCriteriaList @@ -76,9 +78,10 @@ We detect you have activated flash attention support, but running model computat apply_rotary_emb_func = None rms_norm = None flash_attn_unpadded_func = None +flash_attn_func = None def _import_flash_attn(): - global apply_rotary_emb_func, rms_norm, flash_attn_unpadded_func + global apply_rotary_emb_func, rms_norm, flash_attn_unpadded_func, flash_attn_func try: from flash_attn.layers.rotary import apply_rotary_emb_func as __apply_rotary_emb_func apply_rotary_emb_func = __apply_rotary_emb_func @@ -99,14 +102,18 @@ def _import_flash_attn(): try: import flash_attn + _flash_attn_func = None if not hasattr(flash_attn, '__version__'): from flash_attn.flash_attn_interface import flash_attn_unpadded_func as __flash_attn_unpadded_func else: if int(flash_attn.__version__.split(".")[0]) >= 2: + if int(flash_attn.__version__.split(".")[1]) >= 1: + from flash_attn.flash_attn_interface import flash_attn_func as _flash_attn_func from flash_attn.flash_attn_interface import flash_attn_varlen_func as __flash_attn_unpadded_func else: from flash_attn.flash_attn_interface import flash_attn_unpadded_func as __flash_attn_unpadded_func flash_attn_unpadded_func = __flash_attn_unpadded_func + flash_attn_func = _flash_attn_func except ImportError: logger.warn( "Warning: import flash_attn fail, please install FlashAttention to get higher efficiency " @@ -179,6 +186,11 @@ class FlashSelfAttention(torch.nn.Module): seqlen_k = k.shape[1] seqlen_out = seqlen_q + if flash_attn_func is not None and batch_size == 1: + dropout_p = self.dropout_p if self.training else 0 + output = flash_attn_func(q, k, v, dropout_p, softmax_scale=self.softmax_scale, causal=self.causal) + return output + q, k, v = [rearrange(x, "b s ... -> (b s) ...") for x in [q, k, v]] cu_seqlens_q = torch.arange( 0, @@ -294,13 +306,21 @@ class QWenAttention(nn.Module): self.cache_qmin = torch.tensor(torch.iinfo(torch.uint8).min, dtype=cache_dtype) if config.use_cache_quantization and config.use_cache_kernel: - from .cpp_kernels import cache_autogptq_cuda_256 - try: - self.cache_kernels = cache_autogptq_cuda_256 - except ImportError: + # pre check if the support files existing + module_root = pathlib.Path(__file__).parent + src_files = ("cache_autogptq_cuda_256.cpp", "cache_autogptq_cuda_kernel_256.cu") + if any(not (module_root/src).is_file() for src in src_files): + warnings.warn("KV cache kernel source files (.cpp and .cu) not found.") self.cache_kernels = None - - def _attn(self, query, key, value, registered_causal_mask, attention_mask=None, head_mask=None): + else: + try: + from .cpp_kernels import cache_autogptq_cuda_256 + self.cache_kernels = cache_autogptq_cuda_256 + except ImportError: + warnings.warn("Failed to import KV cache kernels.") + self.cache_kernels = None + + def _attn(self, query, key, value, causal_mask=None, attention_mask=None, head_mask=None): device = query.device if self.use_cache_quantization: qk, qk_scale, qk_zero = key @@ -325,26 +345,13 @@ class QWenAttention(nn.Module): size_temp = value[0].size(-1) else: size_temp = value.size(-1) - attn_weights = attn_weights / torch.full( - [], - size_temp ** 0.5, - dtype=attn_weights.dtype, - device=attn_weights.device, - ) - if self.use_cache_quantization: - query_length, key_length = query.size(-2), key[0].size(-2) - else: - query_length, key_length = query.size(-2), key.size(-2) - causal_mask = registered_causal_mask[ - :, :, key_length - query_length : key_length, :key_length - ] + attn_weights = attn_weights / (size_temp ** 0.5) + mask_value = torch.finfo(attn_weights.dtype).min - mask_value = torch.full([], mask_value, dtype=attn_weights.dtype).to( - attn_weights.device - ) - attn_weights = torch.where( - causal_mask, attn_weights.to(attn_weights.dtype), mask_value - ) + if causal_mask is not None: + attn_weights = torch.where( + causal_mask, attn_weights.to(attn_weights.dtype), mask_value + ) if attention_mask is not None: attn_weights = attn_weights + attention_mask @@ -384,62 +391,6 @@ class QWenAttention(nn.Module): return attn_output, attn_weights - def _upcast_and_reordered_attn( - self, query, key, value, registered_causal_mask, attention_mask=None, head_mask=None - ): - bsz, num_heads, q_seq_len, dk = query.size() - _, _, k_seq_len, _ = key.size() - - attn_weights = torch.empty( - bsz * num_heads, - q_seq_len, - k_seq_len, - dtype=torch.float32, - device=query.device, - ) - - scale_factor = 1.0 - if self.scale_attn_weights: - scale_factor /= float(value.size(-1)) ** 0.5 - - with autocast(enabled=False): - q, k = query.reshape(-1, q_seq_len, dk), key.transpose(-1, -2).reshape( - -1, dk, k_seq_len - ) - attn_weights = torch.baddbmm( - attn_weights, q.float(), k.float(), beta=0, alpha=scale_factor - ) - attn_weights = attn_weights.reshape(bsz, num_heads, q_seq_len, k_seq_len) - - query_length, key_length = query.size(-2), key.size(-2) - causal_mask = registered_causal_mask[ - :, :, key_length - query_length : key_length, :key_length - ] - mask_value = torch.finfo(attn_weights.dtype).min - mask_value = torch.tensor(mask_value, dtype=attn_weights.dtype).to( - attn_weights.device - ) - attn_weights = torch.where(causal_mask, attn_weights, mask_value) - - if attention_mask is not None: - attn_weights = attn_weights + attention_mask - - attn_weights = nn.functional.softmax(attn_weights, dim=-1) - - if attn_weights.dtype != torch.float32: - raise RuntimeError( - "Error with upcasting, attn_weights does not have dtype torch.float32" - ) - attn_weights = attn_weights.type(value.dtype) - attn_weights = self.attn_dropout(attn_weights) - - if head_mask is not None: - attn_weights = attn_weights * head_mask - - attn_output = torch.matmul(attn_weights, value) - - return attn_output, attn_weights - def _split_heads(self, tensor, num_heads, attn_head_size): new_shape = tensor.size()[:-1] + (num_heads, attn_head_size) tensor = tensor.view(new_shape) @@ -454,7 +405,6 @@ class QWenAttention(nn.Module): self, hidden_states: Optional[Tuple[torch.FloatTensor]], rotary_pos_emb_list: Optional[List[List[torch.Tensor]]] = None, - registered_causal_mask: Optional[torch.Tensor] = None, layer_past: Optional[Tuple[torch.Tensor]] = None, attention_mask: Optional[torch.FloatTensor] = None, head_mask: Optional[torch.FloatTensor] = None, @@ -528,14 +478,15 @@ class QWenAttention(nn.Module): else: present = None - if self.use_logn_attn and not self.training: + key_size = key[0].size(2) if self.use_cache_quantization else key.size(1) + if key_size > self.seq_length and self.use_logn_attn and not self.training: if self.use_cache_quantization: seq_start = key[0].size(2) - query.size(1) seq_end = key[0].size(2) else: seq_start = key.size(1) - query.size(1) seq_end = key.size(1) - logn_tensor = self.logn_tensor[:, seq_start:seq_end, :, :] + logn_tensor = self.logn_tensor[:, seq_start:seq_end, :, :].type_as(query) query = query * logn_tensor.expand_as(query) if ( @@ -547,12 +498,19 @@ class QWenAttention(nn.Module): q, k, v = query, key, value attn_output = self.core_attention_flash(q, k, v, attention_mask=attention_mask) else: + key_size = key[0].size(2) if self.use_cache_quantization else key.size(1) + if query.size(1) == key_size: + causal_mask = torch.tril( + torch.ones((key_size, key_size), dtype=torch.bool, device=query.device) + ).view(1, 1, key_size, key_size) + else: + causal_mask = None query = query.permute(0, 2, 1, 3) if not self.use_cache_quantization: key = key.permute(0, 2, 1, 3) value = value.permute(0, 2, 1, 3) if ( - registered_causal_mask is None + causal_mask is None and self.use_flash_attn and flash_attn_unpadded_func is not None and not self.is_fp32 @@ -561,13 +519,12 @@ class QWenAttention(nn.Module): raise Exception(_ERROR_INPUT_CPU_QUERY_WITH_FLASH_ATTN_ACTIVATED) if not self.use_cache_quantization and SUPPORT_TORCH2: - causal_mask = registered_causal_mask[ - :, :, key.size(-2) - query.size(-2): key.size(-2), :key.size(-2) - ] if attention_mask is not None: attention_mask = attention_mask.expand( -1, -1, causal_mask.size(2), -1 - ).masked_fill(~causal_mask, torch.finfo(query.dtype).min) + ) + if causal_mask is not None: + attention_mask.masked_fill(~causal_mask, torch.finfo(query.dtype).min) else: attention_mask = causal_mask attn_output = F.scaled_dot_product_attention( @@ -576,7 +533,7 @@ class QWenAttention(nn.Module): attn_weight = None else: attn_output, attn_weight = self._attn( - query, key, value, registered_causal_mask, attention_mask, head_mask + query, key, value, causal_mask, attention_mask, head_mask ) context_layer = self._merge_heads( attn_output, self.num_heads, self.head_dim @@ -592,6 +549,8 @@ class QWenAttention(nn.Module): and not self.is_fp32 ): raise ValueError("Cannot output attentions while using flash-attn") + elif not self.use_cache_quantization and SUPPORT_TORCH2: + raise ValueError("Cannot output attentions while using scaled_dot_product_attention") else: outputs += (attn_weight,) @@ -617,6 +576,7 @@ class QWenMLP(nn.Module): output = self.c_proj(intermediate_parallel) return output + class QWenBlock(nn.Module): def __init__(self, config): super().__init__() @@ -639,7 +599,6 @@ class QWenBlock(nn.Module): self, hidden_states: Optional[Tuple[torch.FloatTensor]], rotary_pos_emb_list: Optional[List[List[torch.Tensor]]] = None, - registered_causal_mask: Optional[torch.Tensor] = None, layer_past: Optional[Tuple[torch.Tensor]] = None, attention_mask: Optional[torch.FloatTensor] = None, head_mask: Optional[torch.FloatTensor] = None, @@ -653,7 +612,6 @@ class QWenBlock(nn.Module): attn_outputs = self.attn( layernorm_output, rotary_pos_emb_list, - registered_causal_mask=registered_causal_mask, layer_past=layer_past, attention_mask=attention_mask, head_mask=head_mask, @@ -687,6 +645,7 @@ class QWenPreTrainedModel(PreTrainedModel): is_parallelizable = False supports_gradient_checkpointing = True _no_split_modules = ["QWenBlock"] + _skip_keys_device_placement = "past_key_values" def __init__(self, *inputs, **kwargs): super().__init__(*inputs, **kwargs) @@ -749,25 +708,10 @@ class QWenModel(QWenPreTrainedModel): if self.rotary_ndims is not None else config.kv_channels ) - self.rotary_emb = RotaryEmbedding(dim, base=config.rotary_emb_base, seq_length_expansion=config.seq_length_expansion if hasattr(self.config, 'seq_length_expansion') else 1) + self.rotary_emb = RotaryEmbedding(dim, base=config.rotary_emb_base) self.use_flash_attn = config.use_flash_attn self.is_fp32 = not (config.bf16 or config.fp16) - if ( - self.use_flash_attn - and flash_attn_unpadded_func is not None - and not self.is_fp32 - ): - self.registered_causal_mask = None - else: - max_positions = config.max_position_embeddings - self.register_buffer( - "registered_causal_mask", - torch.tril( - torch.ones((max_positions, max_positions), dtype=torch.bool) - ).view(1, 1, max_positions, max_positions), - persistent=False, - ) self.h = nn.ModuleList( [ @@ -939,7 +883,6 @@ class QWenModel(QWenPreTrainedModel): create_custom_forward(block), hidden_states, rotary_pos_emb_list, - self.registered_causal_mask, None, attention_mask, head_mask[i], @@ -951,7 +894,6 @@ class QWenModel(QWenPreTrainedModel): hidden_states, layer_past=layer_past, rotary_pos_emb_list=rotary_pos_emb_list, - registered_causal_mask=self.registered_causal_mask, attention_mask=attention_mask, head_mask=head_mask[i], encoder_hidden_states=encoder_hidden_states, @@ -1052,7 +994,6 @@ class QWenLMHeadModel(QWenPreTrainedModel): self.lm_head.half() self.post_init() - def get_output_embeddings(self): return self.lm_head @@ -1062,22 +1003,13 @@ class QWenLMHeadModel(QWenPreTrainedModel): def prepare_inputs_for_generation( self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs ): - token_type_ids = kwargs.get("token_type_ids", None) if past_key_values: input_ids = input_ids[:, -1].unsqueeze(-1) - if token_type_ids is not None: - token_type_ids = token_type_ids[:, -1].unsqueeze(-1) - - attention_mask = kwargs.get("attention_mask", None) - position_ids = kwargs.get("position_ids", None) - if attention_mask is not None and position_ids is None: - position_ids = attention_mask.long().cumsum(-1) - 1 - position_ids.masked_fill_(attention_mask == 0, 1) - if past_key_values: - position_ids = position_ids[:, -1].unsqueeze(-1) + if input_ids.size(0) == 1: + attention_mask = None else: - position_ids = None + attention_mask = kwargs.get("attention_mask", None) if inputs_embeds is not None and past_key_values is None: model_inputs = {"inputs_embeds": inputs_embeds} @@ -1088,9 +1020,7 @@ class QWenLMHeadModel(QWenPreTrainedModel): { "past_key_values": past_key_values, "use_cache": kwargs.get("use_cache"), - "position_ids": position_ids, "attention_mask": attention_mask, - "token_type_ids": token_type_ids, } ) return model_inputs @@ -1177,7 +1107,6 @@ class QWenLMHeadModel(QWenPreTrainedModel): query: str, history: Optional[HistoryType], system: str = "You are a helpful assistant.", - append_history: bool = True, stream: Optional[bool] = _SENTINEL, stop_words_ids: Optional[List[List[int]]] = None, generation_config: Optional[GenerationConfig] = None, @@ -1189,6 +1118,10 @@ class QWenLMHeadModel(QWenPreTrainedModel): assert generation_config.chat_format == 'chatml', _ERROR_BAD_CHAT_FORMAT if history is None: history = [] + else: + # make a copy of the user's input such that is is left untouched + history = copy.deepcopy(history) + if stop_words_ids is None: stop_words_ids = [] @@ -1226,8 +1159,11 @@ class QWenLMHeadModel(QWenPreTrainedModel): errors='replace' ) - if append_history: - history.append((query, response)) + # as history is a copy of the user inputs, + # we can always return the new turn to the user. + # separating input history and output history also enables the user + # to implement more complex history management + history.append((query, response)) return response, history @@ -1341,7 +1277,7 @@ class QWenLMHeadModel(QWenPreTrainedModel): class RotaryEmbedding(torch.nn.Module): - def __init__(self, dim, base=10000, seq_length_expansion=1): + def __init__(self, dim, base=10000): super().__init__() self.dim = dim self.base = base @@ -1354,10 +1290,8 @@ class RotaryEmbedding(torch.nn.Module): self._seq_len_cached = 0 self._ntk_alpha_cached = 1.0 self._ntk_alpha_cached_list = [1.0] - self.seq_length_expansion = seq_length_expansion - def update_rotary_pos_emb_cache(self, max_seq_len, offset=0, ntk_alpha=1.0): - seqlen = max_seq_len + offset + def update_rotary_pos_emb_cache(self, seqlen, ntk_alpha=1.0): if seqlen > self._seq_len_cached or ntk_alpha != self._ntk_alpha_cached: base = self.base * ntk_alpha ** (self.dim / (self.dim - 2)) self.inv_freq = 1.0 / ( @@ -1370,7 +1304,6 @@ class RotaryEmbedding(torch.nn.Module): self._seq_len_cached = max(2 * seqlen, 16) self._ntk_alpha_cached = ntk_alpha seq = torch.arange(self._seq_len_cached, device=self.inv_freq.device) - seq = seq/self.seq_length_expansion freqs = torch.outer(seq.type_as(self.inv_freq), self.inv_freq) emb = torch.cat((freqs, freqs), dim=-1) @@ -1381,10 +1314,10 @@ class RotaryEmbedding(torch.nn.Module): cos, sin = emb.cos(), emb.sin() self._rotary_pos_emb_cache = [cos, sin] - def forward(self, max_seq_len, offset=0, ntk_alpha=1.0): - self.update_rotary_pos_emb_cache(max_seq_len, offset, ntk_alpha) + def forward(self, max_seq_len, ntk_alpha=1.0): + self.update_rotary_pos_emb_cache(max_seq_len, ntk_alpha) cos, sin = self._rotary_pos_emb_cache - return [cos[:, offset : offset + max_seq_len], sin[:, offset : offset + max_seq_len]] + return [cos[:, :max_seq_len], sin[:, :max_seq_len]] def _rotate_half(x): @@ -1396,21 +1329,28 @@ def _rotate_half(x): def apply_rotary_pos_emb(t, freqs): + """ Apply rotary embedding to the first rotary_dim of the iput + + Arguments: + t (tensor(batch_size, seq_len, n_head, head_dim)): + the input embedding/hidden states + freqs (list[tensor(1, seq_len, 1, rotary_dim), tensor(1, seq_len, 1, rotary_dim)]): + the cached cos/sin position embeddings + """ + rot_dim = freqs[0].shape[-1] cos, sin = freqs + t_float = t.float() if apply_rotary_emb_func is not None and t.is_cuda: - t_ = t.float() - cos = cos.squeeze(0).squeeze(1)[:, : cos.shape[-1] // 2] - sin = sin.squeeze(0).squeeze(1)[:, : sin.shape[-1] // 2] - output = apply_rotary_emb_func(t_, cos, sin).type_as(t) - return output + # apply_rotary_emb in flash_attn requires cos/sin to be of + # shape (seqlen, rotary_dim / 2) and apply rotary embedding + # to the first rotary_dim of the input + cos = cos.squeeze(0).squeeze(1)[:, : rot_dim // 2] + sin = sin.squeeze(0).squeeze(1)[:, : rot_dim // 2] + return apply_rotary_emb_func(t_float, cos, sin).type_as(t) else: - rot_dim = freqs[0].shape[-1] - cos, sin = freqs - t_, t_pass_ = t[..., :rot_dim], t[..., rot_dim:] - t_ = t_.float() - t_pass_ = t_pass_.float() - t_ = (t_ * cos) + (_rotate_half(t_) * sin) - return torch.cat((t_, t_pass_), dim=-1).type_as(t) + t_rot, t_pass = t_float[..., :rot_dim], t_float[..., rot_dim:] + t_rot = (t_rot * cos) + (_rotate_half(t_rot) * sin) + return torch.cat((t_rot, t_pass), dim=-1).type_as(t) class RMSNorm(torch.nn.Module): @@ -1428,3 +1368,4 @@ class RMSNorm(torch.nn.Module): else: output = self._norm(x.float()).type_as(x) return output * self.weight + diff --git a/qwen_generation_utils.py b/qwen_generation_utils.py index 4e8e1d8cadcb50ee9dffecc629d368371d268e88..0949f4f52473040913d6cab043c525a519b24efa 100644 --- a/qwen_generation_utils.py +++ b/qwen_generation_utils.py @@ -414,3 +414,4 @@ def top_k_logits(logits, top_k=0, top_p=0.0, filter_value=-float("Inf")): def switch(val1, val2, boolean): boolean = boolean.type_as(val1) return (1 - boolean) * val1 + boolean * val2 + diff --git a/tokenization_qwen.py b/tokenization_qwen.py index 2a526d66c3fc0779cb469fb9c838864ad2453d60..5c45e4a8f56ba8ed431221accb9f686066022ada 100644 --- a/tokenization_qwen.py +++ b/tokenization_qwen.py @@ -274,3 +274,4 @@ class QWenTokenizer(PreTrainedTokenizer): if skip_special_tokens: token_ids = [i for i in token_ids if i < self.eod_id] return self.tokenizer.decode(token_ids, errors=errors or self.errors) +