lrh12580
first commit
5cb6c4b
#include "cupti_add.h"
static void
printActivity(CUpti_Activity *record)
{
switch (record->kind)
{
case CUPTI_ACTIVITY_KIND_KERNEL:
{
int status;
CUpti_ActivityKernel4 *kernel = (CUpti_ActivityKernel4 *)record;
printf("CUPTI,%s,%llu,%llu,%llu\n",
abi::__cxa_demangle(kernel->name, 0, 0, &status),
(unsigned long long)(kernel->start),
(unsigned long long)(kernel->end),
(unsigned long long)(kernel->end - startTimestamp) - (kernel->start - startTimestamp));
break;
}
case CUPTI_ACTIVITY_KIND_RUNTIME:
{
CUpti_ActivityAPI *api = (CUpti_ActivityAPI *)record;
const char *callback_name;
cuptiGetCallbackName(CUPTI_CB_DOMAIN_RUNTIME_API, api->cbid, &callback_name);
printf("RUNTIME %s (cbid=%u) [ %llu - %llu ] process %u, thread %u, correlation %u\n",
callback_name, api->cbid,
(unsigned long long)(api->start - startTimestamp),
(unsigned long long)(api->end - startTimestamp),
api->processId, api->threadId, api->correlationId);
break;
}
}
}
void CUPTIAPI bufferRequested(uint8_t **buffer, size_t *size, size_t *maxNumRecords)
{
uint8_t *bfr = (uint8_t *)malloc(BUF_SIZE + ALIGN_SIZE);
if (bfr == NULL)
{
printf("Error: out of memory\n");
exit(-1);
}
*size = BUF_SIZE;
*buffer = ALIGN_BUFFER(bfr, ALIGN_SIZE);
*maxNumRecords = 0;
}
void CUPTIAPI bufferCompleted(CUcontext ctx, uint32_t streamId, uint8_t *buffer, size_t size, size_t validSize)
{
CUptiResult status;
CUpti_Activity *record = NULL;
if (validSize > 0)
{
do
{
status = cuptiActivityGetNextRecord(buffer, validSize, &record);
if (status == CUPTI_SUCCESS)
{
printActivity(record);
}
else if (status == CUPTI_ERROR_MAX_LIMIT_REACHED)
break;
else
{
CUPTI_CALL(status);
}
} while (1);
// report any records dropped from the queue
size_t dropped;
CUPTI_CALL(cuptiActivityGetNumDroppedRecords(ctx, streamId, &dropped));
if (dropped != 0)
{
printf("Dropped %u activity records\n", (unsigned int)dropped);
}
}
free(buffer);
}
void initTrace()
{
size_t attrValue = 0, attrValueSize = sizeof(size_t);
// CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_RUNTIME));
CUPTI_CALL(cuptiActivityEnable(CUPTI_ACTIVITY_KIND_KERNEL));
// Register callbacks for buffer requests and for buffers completed by CUPTI.
CUPTI_CALL(cuptiActivityRegisterCallbacks(bufferRequested, bufferCompleted));
// Optionally get and set activity attributes.
// Attributes can be set by the CUPTI client to change behavior of the activity API.
// Some attributes require to be set before any CUDA context is created to be effective,
// e.g. to be applied to all device buffer allocations (see documentation).
CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue));
printf("%s = %llu B\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE", (long long unsigned)attrValue);
attrValue *= 2;
CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_SIZE, &attrValueSize, &attrValue));
CUPTI_CALL(cuptiActivityGetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue));
printf("%s = %llu\n", "CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT", (long long unsigned)attrValue);
attrValue *= 2;
CUPTI_CALL(cuptiActivitySetAttribute(CUPTI_ACTIVITY_ATTR_DEVICE_BUFFER_POOL_LIMIT, &attrValueSize, &attrValue));
CUPTI_CALL(cuptiGetTimestamp(&startTimestamp));
}
void finiTrace()
{
// Force flush any remaining activity buffers before termination of the application
CUPTI_CALL(cuptiActivityFlushAll(1));
}