|
#ifndef OPENPOSE_PRIVATE_UTILITIES_AVX_HPP |
|
#define OPENPOSE_PRIVATE_UTILITIES_AVX_HPP |
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef WITH_AVX |
|
#include <cstdint> |
|
#include <memory> |
|
#include <immintrin.h> |
|
#include <openpose/utilities/errorAndLog.hpp> |
|
|
|
namespace op |
|
{ |
|
#ifdef __GNUC__ |
|
#define ALIGN32(x) x __attribute__((aligned(32))) |
|
#elif defined(_MSC_VER) |
|
#define ALIGN32(x) __declspec(align(32)) |
|
#else |
|
#error Unknown environment! |
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
typedef unsigned long long offset_t; |
|
#define PTR_OFFSET_SZ sizeof(offset_t) |
|
#ifndef align_up |
|
#define align_up(num, align) \ |
|
(((num) + ((align) - 1)) & ~((align) - 1)) |
|
#endif |
|
inline void * aligned_malloc(const size_t align, const size_t size) |
|
{ |
|
void * ptr = nullptr; |
|
|
|
|
|
|
|
|
|
if (align && size && (align & (align - 1)) == 0) |
|
{ |
|
|
|
|
|
const auto hdr_size = PTR_OFFSET_SZ + (align - 1); |
|
void * p = malloc(size + hdr_size); |
|
|
|
if (p) |
|
{ |
|
|
|
|
|
ptr = (void *) align_up(((uintptr_t)p + PTR_OFFSET_SZ), align); |
|
|
|
|
|
*((offset_t *)ptr - 1) = (offset_t)((uintptr_t)ptr - (uintptr_t)p); |
|
|
|
} |
|
} |
|
|
|
if (ptr == nullptr) |
|
{ |
|
error("Shared pointer could not be allocated for Array data storage.", |
|
__LINE__, __FUNCTION__, __FILE__); |
|
} |
|
|
|
return ptr; |
|
} |
|
inline void aligned_free(void * ptr) |
|
{ |
|
if (ptr == nullptr) |
|
error("Received nullptr.", __LINE__, __FUNCTION__, __FILE__); |
|
|
|
|
|
|
|
offset_t offset = *((offset_t *)ptr - 1); |
|
|
|
|
|
void * p = (void *)((uint8_t *)ptr - offset); |
|
free(p); |
|
} |
|
template<class T> |
|
std::shared_ptr<T> aligned_shared_ptr(const size_t size) |
|
{ |
|
try |
|
{ |
|
return std::shared_ptr<T>(static_cast<T*>( |
|
aligned_malloc(8*sizeof(T), sizeof(T)*size)), &aligned_free); |
|
} |
|
catch (const std::exception& e) |
|
{ |
|
error(e.what(), __LINE__, __FUNCTION__, __FILE__); |
|
return std::shared_ptr<T>{}; |
|
} |
|
} |
|
} |
|
#endif |
|
|
|
#endif |
|
|