|
#ifndef THC_DEVICE_ALLOCATOR_INC |
|
#define THC_DEVICE_ALLOCATOR_INC |
|
#include <c10/core/Allocator.h> |
|
#include <c10/cuda/CUDAGraphsC10Utils.h> |
|
#include <c10/cuda/CUDAMacros.h> |
|
#include <c10/cuda/CUDAStream.h> |
|
#include <c10/util/Registry.h> |
|
|
|
#include <array> |
|
#include <mutex> |
|
|
|
namespace c10 { |
|
|
|
|
|
|
|
class C10_CUDA_API FreeMemoryCallback { |
|
public: |
|
virtual ~FreeMemoryCallback() = default; |
|
virtual bool Execute() = 0; |
|
}; |
|
|
|
C10_DECLARE_REGISTRY(FreeCudaMemoryCallbacksRegistry, FreeMemoryCallback); |
|
#define REGISTER_FREE_MEMORY_CALLBACK(name, ...) \ |
|
C10_REGISTER_CLASS(FreeCudaMemoryCallbacksRegistry, name, __VA_ARGS__); |
|
|
|
namespace cuda { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace CUDACachingAllocator { |
|
|
|
struct Stat { |
|
int64_t current = 0; |
|
int64_t peak = 0; |
|
int64_t allocated = 0; |
|
int64_t freed = 0; |
|
}; |
|
|
|
enum struct StatType : uint64_t { |
|
AGGREGATE = 0, |
|
SMALL_POOL = 1, |
|
LARGE_POOL = 2, |
|
NUM_TYPES = 3 |
|
}; |
|
|
|
typedef std::array<Stat, static_cast<size_t>(StatType::NUM_TYPES)> StatArray; |
|
|
|
|
|
struct DeviceStats { |
|
|
|
StatArray allocation; |
|
|
|
StatArray segment; |
|
|
|
StatArray active; |
|
|
|
|
|
StatArray inactive_split; |
|
|
|
|
|
StatArray allocated_bytes; |
|
|
|
StatArray reserved_bytes; |
|
|
|
StatArray active_bytes; |
|
|
|
StatArray inactive_split_bytes; |
|
|
|
|
|
|
|
int64_t num_alloc_retries = 0; |
|
|
|
|
|
int64_t num_ooms = 0; |
|
|
|
|
|
Stat oversize_allocations; |
|
|
|
|
|
Stat oversize_segments; |
|
|
|
|
|
int64_t max_split_size = 0; |
|
}; |
|
|
|
struct Context { |
|
virtual ~Context() {} |
|
}; |
|
|
|
typedef std::unique_ptr<Context> (*CreateContextFn)(void); |
|
|
|
struct History { |
|
void* addr; |
|
size_t real_size; |
|
std::unique_ptr<Context> context; |
|
std::unique_ptr<History> next; |
|
|
|
}; |
|
|
|
|
|
|
|
struct BlockInfo { |
|
int64_t size = 0; |
|
int32_t gc_counter = 0; |
|
bool allocated = false; |
|
bool active = false; |
|
History* history = |
|
nullptr; |
|
}; |
|
|
|
|
|
struct SegmentInfo { |
|
int64_t device = 0; |
|
int64_t address = 0; |
|
int64_t total_size = 0; |
|
int64_t allocated_size = 0; |
|
int64_t active_size = 0; |
|
cudaStream_t stream = 0; |
|
bool is_large = false; |
|
std::vector<BlockInfo> blocks; |
|
}; |
|
|
|
C10_CUDA_API void* raw_alloc(size_t nbytes); |
|
C10_CUDA_API void* raw_alloc_with_stream(size_t nbytes, cudaStream_t stream); |
|
C10_CUDA_API void raw_delete(void* ptr); |
|
|
|
C10_CUDA_API Allocator* get(); |
|
C10_CUDA_API void init(int device_count); |
|
C10_CUDA_API void setMemoryFraction(double fraction, int device); |
|
C10_CUDA_API void setAllocatorSettings(const std::string& env); |
|
C10_CUDA_API void emptyCache(); |
|
C10_CUDA_API void cacheInfo( |
|
int dev_id, |
|
size_t* cachedAndFree, |
|
size_t* largestBlock); |
|
C10_CUDA_API void* getBaseAllocation(void* ptr, size_t* size); |
|
C10_CUDA_API void recordStream(const DataPtr&, CUDAStream stream); |
|
C10_CUDA_API DeviceStats getDeviceStats(int device); |
|
C10_CUDA_API void resetAccumulatedStats(int device); |
|
C10_CUDA_API void resetPeakStats(int device); |
|
C10_CUDA_API std::vector<SegmentInfo> snapshot(); |
|
|
|
|
|
C10_CUDA_API void notifyCaptureBegin( |
|
int device, |
|
CaptureId_t graph_id, |
|
MempoolId_t mempool_id); |
|
C10_CUDA_API void notifyCaptureEnd(int device, CaptureId_t graph_id); |
|
C10_CUDA_API void notifyCaptureDestroy(int device, MempoolId_t mempool_id); |
|
|
|
C10_CUDA_API std::mutex* getFreeMutex(); |
|
|
|
C10_CUDA_API void setContextRecorder(CreateContextFn recorder); |
|
|
|
C10_CUDA_API std::shared_ptr<void> getIpcDevPtr(std::string handle); |
|
} |
|
|
|
} |
|
} |
|
|
|
#endif |
|
|