chinoll
/

chatsakura-3b-int4

Text Generation

text-generation-inference

Inference Endpoints

Model card Files Files and versions Community

chinoll commited on Apr 1, 2023

Commit

fba9109

·

1 Parent(s): ada18b0

Update quant.py

Files changed (1) hide show

quant.py +4 -2

quant.py CHANGED Viewed

@@ -657,7 +657,8 @@ __global__ void VecQuant8MatMulKernel(
   atomicAdd(&mul[b * width + w], res);
 }
     '''
-    open("quant_cuda_kernel.cu","w").write(cucode).close()
     cppcode = '''
 #include <torch/all.h>
 #include <torch/python.h>
@@ -730,7 +731,8 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
   m.def("vecquant8matmul", &vecquant8matmul, "Vector 8-bit Quantized Matrix Multiplication (CUDA)");
 }
     '''
-    open("quant_cuda.cpp","w").write(cppcode).close()
     setup(
         name='quant_cuda',
         ext_modules=[cpp_extension.CUDAExtension(

   atomicAdd(&mul[b * width + w], res);
 }
     '''
+    open("quant_cuda_kernel.cu","w") as f:
+        f.write(cucode)
     cppcode = '''
 #include <torch/all.h>
 #include <torch/python.h>
   m.def("vecquant8matmul", &vecquant8matmul, "Vector 8-bit Quantized Matrix Multiplication (CUDA)");
 }
     '''
+    with open("quant_cuda.cpp","w") as f:
+        f.write(cppcode)
     setup(
         name='quant_cuda',
         ext_modules=[cpp_extension.CUDAExtension(