from cpython cimport PyBytes_AsString | |
#from cpython cimport PyByteArray_AsString # cython still not exports that | |
cdef extern from "Python.h": | |
char* PyByteArray_AsString(bytearray ba) except NULL | |
from libc.stdint cimport uint32_t, uint64_t, uintmax_t | |
def _websocket_mask_cython(object mask, object data): | |
"""Note, this function mutates its `data` argument | |
""" | |
cdef: | |
Py_ssize_t data_len, i | |
# bit operations on signed integers are implementation-specific | |
unsigned char * in_buf | |
const unsigned char * mask_buf | |
uint32_t uint32_msk | |
uint64_t uint64_msk | |
assert len(mask) == 4 | |
if not isinstance(mask, bytes): | |
mask = bytes(mask) | |
if isinstance(data, bytearray): | |
data = <bytearray>data | |
else: | |
data = bytearray(data) | |
data_len = len(data) | |
in_buf = <unsigned char*>PyByteArray_AsString(data) | |
mask_buf = <const unsigned char*>PyBytes_AsString(mask) | |
uint32_msk = (<uint32_t*>mask_buf)[0] | |
# TODO: align in_data ptr to achieve even faster speeds | |
# does it need in python ?! malloc() always aligns to sizeof(long) bytes | |
if sizeof(size_t) >= 8: | |
uint64_msk = uint32_msk | |
uint64_msk = (uint64_msk << 32) | uint32_msk | |
while data_len >= 8: | |
(<uint64_t*>in_buf)[0] ^= uint64_msk | |
in_buf += 8 | |
data_len -= 8 | |
while data_len >= 4: | |
(<uint32_t*>in_buf)[0] ^= uint32_msk | |
in_buf += 4 | |
data_len -= 4 | |
for i in range(0, data_len): | |
in_buf[i] ^= mask_buf[i] | |